Skip to content

Commit 2272cec

Browse files
Use unicodedata instead of eval.
1 parent c0a2689 commit 2272cec

File tree

1 file changed

+22
-26
lines changed

1 file changed

+22
-26
lines changed

Lib/sre_parse.py

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# XXX: show string offset and offending character for all errors
1414

1515
from sre_constants import *
16-
from ast import literal_eval
16+
import unicodedata
1717

1818
SPECIAL_CHARS = ".\\[{()*+?^$|"
1919
REPEAT_CHARS = "*+?{"
@@ -26,10 +26,6 @@
2626

2727
WHITESPACE = frozenset(" \t\n\r\v\f")
2828

29-
UNICODE_NAME = ASCIILETTERS | DIGITS | frozenset(' -')
30-
CLOSING_BRACE = frozenset("}")
31-
OPENING_BRACE = frozenset("{")
32-
3329

3430
_REPEATCODES = frozenset({MIN_REPEAT, MAX_REPEAT})
3531
_UNITCODES = frozenset({ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY})
@@ -270,19 +266,19 @@ def getwhile(self, n, charset):
270266
result += c
271267
self.__next()
272268
return result
273-
def getuntil(self, terminator):
269+
def getuntil(self, terminator, name):
274270
result = ''
275271
while True:
276272
c = self.next
277273
self.__next()
278274
if c is None:
279275
if not result:
280-
raise self.error("missing group name")
276+
raise self.error("missing " + name)
281277
raise self.error("missing %s, unterminated name" % terminator,
282278
len(result))
283279
if c == terminator:
284280
if not result:
285-
raise self.error("missing group name", 1)
281+
raise self.error("missing " + name, 1)
286282
break
287283
result += c
288284
return result
@@ -330,14 +326,14 @@ def _class_escape(source, escape):
330326
return LITERAL, c
331327
elif c == "N" and source.istext:
332328
# named unicode escape e.g. \N{EM DASH}
333-
escape += source.getwhile(1, OPENING_BRACE)
334-
escape += source.getwhile(100, UNICODE_NAME)
335-
escape += source.getwhile(1, CLOSING_BRACE)
329+
if not source.match('{'):
330+
raise source.error("missing {")
331+
charname = source.getuntil('}', 'character name')
336332
try:
337-
c = ord(literal_eval('"%s"' % escape))
338-
except SyntaxError:
339-
charname = escape[2:].strip('{}')
340-
raise source.error("unknown Unicode character name %s" % charname, len(escape))
333+
c = ord(unicodedata.lookup(charname))
334+
except KeyError:
335+
raise source.error("undefined character name %r" % charname,
336+
len(charname) + len(r'\N{}'))
341337
return LITERAL, c
342338
elif c in OCTDIGITS:
343339
# octal escape (up to three digits)
@@ -389,14 +385,14 @@ def _escape(source, escape, state):
389385
return LITERAL, c
390386
elif c == "N" and source.istext:
391387
# named unicode escape e.g. \N{EM DASH}
392-
escape += source.getwhile(1, OPENING_BRACE)
393-
escape += source.getwhile(100, UNICODE_NAME)
394-
escape += source.getwhile(1, CLOSING_BRACE)
388+
if not source.match('{'):
389+
raise source.error("missing {")
390+
charname = source.getuntil('}', 'character name')
395391
try:
396-
c = ord(literal_eval('"%s"' % escape))
397-
except SyntaxError:
398-
charname = escape[2:].strip('{}')
399-
raise source.error("unknown Unicode character name %s" % charname, len(escape))
392+
c = ord(unicodedata.lookup(charname))
393+
except KeyError:
394+
raise source.error("undefined character name %r" % charname,
395+
len(charname) + len(r'\N{}'))
400396
return LITERAL, c
401397
elif c == "0":
402398
# octal escape
@@ -707,13 +703,13 @@ def _parse(source, state, verbose, nested, first=False):
707703
# python extensions
708704
if sourcematch("<"):
709705
# named group: skip forward to end of name
710-
name = source.getuntil(">")
706+
name = source.getuntil(">", "group name")
711707
if not name.isidentifier():
712708
msg = "bad character in group name %r" % name
713709
raise source.error(msg, len(name) + 1)
714710
elif sourcematch("="):
715711
# named backreference
716-
name = source.getuntil(")")
712+
name = source.getuntil(")", "group name")
717713
if not name.isidentifier():
718714
msg = "bad character in group name %r" % name
719715
raise source.error(msg, len(name) + 1)
@@ -776,7 +772,7 @@ def _parse(source, state, verbose, nested, first=False):
776772

777773
elif char == "(":
778774
# conditional backreference group
779-
condname = source.getuntil(")")
775+
condname = source.getuntil(")", "group name")
780776
if condname.isidentifier():
781777
condgroup = state.groupdict.get(condname)
782778
if condgroup is None:
@@ -1005,7 +1001,7 @@ def addgroup(index, pos):
10051001
name = ""
10061002
if not s.match("<"):
10071003
raise s.error("missing <")
1008-
name = s.getuntil(">")
1004+
name = s.getuntil(">", "group name")
10091005
if name.isidentifier():
10101006
try:
10111007
index = groupindex[name]

0 commit comments

Comments
 (0)