Skip to content

Commit 4f17c5c

Browse files
authored
bpo-12915: Improve Unicode support for package names and attributes. (GH-18517)
1 parent e263bb1 commit 4f17c5c

File tree

2 files changed

+39
-6
lines changed

2 files changed

+39
-6
lines changed

Lib/pkgutil.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -638,8 +638,8 @@ def get_data(package, resource):
638638
return loader.get_data(resource_name)
639639

640640

641-
_DOTTED_WORDS = r'[a-z_]\w*(\.[a-z_]\w*)*'
642-
_NAME_PATTERN = re.compile(f'^({_DOTTED_WORDS})(:({_DOTTED_WORDS})?)?$', re.I)
641+
_DOTTED_WORDS = r'(?!\d)(\w+)(\.(?!\d)(\w+))*'
642+
_NAME_PATTERN = re.compile(f'^(?P<pkg>{_DOTTED_WORDS})(?P<cln>:(?P<obj>{_DOTTED_WORDS})?)?$', re.U)
643643
del _DOTTED_WORDS
644644

645645
def resolve_name(name):
@@ -677,11 +677,12 @@ def resolve_name(name):
677677
m = _NAME_PATTERN.match(name)
678678
if not m:
679679
raise ValueError(f'invalid format: {name!r}')
680-
groups = m.groups()
681-
if groups[2]:
680+
gd = m.groupdict()
681+
if gd.get('cln'):
682682
# there is a colon - a one-step import is all that's needed
683-
mod = importlib.import_module(groups[0])
684-
parts = groups[3].split('.') if groups[3] else []
683+
mod = importlib.import_module(gd['pkg'])
684+
parts = gd.get('obj')
685+
parts = parts.split('.') if parts else []
685686
else:
686687
# no colon - have to iterate to find the package boundary
687688
parts = name.split('.')

Lib/test/test_pkgutil.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,8 +229,40 @@ def test_name_resolution(self):
229229
('logging.handlers:SysLogHandler.NO_SUCH_VALUE', AttributeError),
230230
('logging.handlers.SysLogHandler.NO_SUCH_VALUE', AttributeError),
231231
('ZeroDivisionError', ImportError),
232+
('os.path.9abc', ValueError),
233+
('9abc', ValueError),
232234
)
233235

236+
# add some Unicode package names to the mix.
237+
238+
unicode_words = ('\u0935\u092e\u0938',
239+
'\xe9', '\xc8',
240+
'\uc548\ub155\ud558\uc138\uc694',
241+
'\u3055\u3088\u306a\u3089',
242+
'\u3042\u308a\u304c\u3068\u3046',
243+
'\u0425\u043e\u0440\u043e\u0448\u043e',
244+
'\u0441\u043f\u0430\u0441\u0438\u0431\u043e',
245+
'\u73b0\u4ee3\u6c49\u8bed\u5e38\u7528\u5b57\u8868')
246+
247+
for uw in unicode_words:
248+
d = os.path.join(self.dirname, uw)
249+
os.makedirs(d, exist_ok=True)
250+
# make an empty __init__.py file
251+
f = os.path.join(d, '__init__.py')
252+
with open(f, 'w') as f:
253+
f.write('')
254+
f.flush()
255+
# now import the package we just created; clearing the caches is
256+
# needed, otherwise the newly created package isn't found
257+
importlib.invalidate_caches()
258+
mod = importlib.import_module(uw)
259+
success_cases += (uw, mod),
260+
if len(uw) > 1:
261+
failure_cases += (uw[:-1], ImportError),
262+
263+
# add an example with a Unicode digit at the start
264+
failure_cases += ('\u0966\u0935\u092e\u0938', ValueError),
265+
234266
for s, expected in success_cases:
235267
with self.subTest(s=s):
236268
o = pkgutil.resolve_name(s)

0 commit comments

Comments
 (0)