15
15
import locale
16
16
import calendar
17
17
from re import compile as re_compile
18
+ from re import sub as re_sub
18
19
from re import IGNORECASE
19
20
from re import escape as re_escape
20
21
from datetime import (date as datetime_date ,
@@ -129,11 +130,23 @@ def __calc_date_time(self):
129
130
time_tuple = time .struct_time ((1999 ,3 ,17 ,22 ,44 ,55 ,2 ,76 ,0 ))
130
131
time_tuple2 = time .struct_time ((1999 ,1 ,3 ,1 ,1 ,1 ,6 ,3 ,0 ))
131
132
replacement_pairs = [
132
- ('1999' , '%Y' ), ('99' , '%y' ), ('22' , '%H' ),
133
- ('44' , '%M' ), ('55' , '%S' ), ('76' , '%j' ),
134
- ('17' , '%d' ), ('03' , '%m' ), ('3' , '%m' ),
135
- # '3' needed for when no leading zero.
136
- ('2' , '%w' ), ('10' , '%I' )]
133
+ ('1999' , '%Y' ), ('99' , '%y' ), ('22' , '%H' ),
134
+ ('44' , '%M' ), ('55' , '%S' ), ('76' , '%j' ),
135
+ ('17' , '%d' ), ('03' , '%m' ), ('3' , '%m' ),
136
+ # '3' needed for when no leading zero.
137
+ ('2' , '%w' ), ('10' , '%I' ),
138
+ # Non-ASCII digits
139
+ ('\u0661 \u0669 \u0669 \u0669 ' , '%Y' ),
140
+ ('\u0669 \u0669 ' , '%Oy' ),
141
+ ('\u0662 \u0662 ' , '%OH' ),
142
+ ('\u0664 \u0664 ' , '%OM' ),
143
+ ('\u0665 \u0665 ' , '%OS' ),
144
+ ('\u0661 \u0667 ' , '%Od' ),
145
+ ('\u0660 \u0663 ' , '%Om' ),
146
+ ('\u0663 ' , '%Om' ),
147
+ ('\u0662 ' , '%Ow' ),
148
+ ('\u0661 \u0660 ' , '%OI' ),
149
+ ]
137
150
date_time = []
138
151
for directive in ('%c' , '%x' , '%X' ):
139
152
current_format = time .strftime (directive , time_tuple ).lower ()
@@ -158,6 +171,10 @@ def __calc_date_time(self):
158
171
for tz in tz_values :
159
172
if tz :
160
173
current_format = current_format .replace (tz , "%Z" )
174
+ # Transform all non-ASCII digits to digits in range U+0660 to U+0669.
175
+ current_format = re_sub (r'\d(?<![0-9])' ,
176
+ lambda m : chr (0x0660 + int (m [0 ])),
177
+ current_format )
161
178
for old , new in replacement_pairs :
162
179
current_format = current_format .replace (old , new )
163
180
# If %W is used, then Sunday, 2005-01-03 will fall on week 0 since
@@ -267,7 +284,7 @@ def __init__(self, locale_time=None):
267
284
else :
268
285
self .locale_time = LocaleTime ()
269
286
base = super ()
270
- base . __init__ ( {
287
+ mapping = {
271
288
# The " [1-9]" part of the regex is to make %c from ANSI C work
272
289
'd' : r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])" ,
273
290
'f' : r"(?P<f>[0-9]{1,6})" ,
@@ -296,11 +313,15 @@ def __init__(self, locale_time=None):
296
313
'Z' : self .__seqToRE ((tz for tz_names in self .locale_time .timezone
297
314
for tz in tz_names ),
298
315
'Z' ),
299
- '%' : '%' })
300
- base .__setitem__ ('W' , base .__getitem__ ('U' ).replace ('U' , 'W' ))
301
- base .__setitem__ ('c' , self .pattern (self .locale_time .LC_date_time ))
302
- base .__setitem__ ('x' , self .pattern (self .locale_time .LC_date ))
316
+ '%' : '%' }
317
+ for d in 'dmyHIMS' :
318
+ mapping ['O' + d ] = r'(?P<%s>\d\d|\d| \d)' % d
319
+ mapping ['Ow' ] = r'(?P<w>\d)'
320
+ mapping ['W' ] = mapping ['U' ].replace ('U' , 'W' )
321
+ base .__init__ (mapping )
303
322
base .__setitem__ ('X' , self .pattern (self .locale_time .LC_time ))
323
+ base .__setitem__ ('x' , self .pattern (self .locale_time .LC_date ))
324
+ base .__setitem__ ('c' , self .pattern (self .locale_time .LC_date_time ))
304
325
305
326
def __seqToRE (self , to_convert , directive ):
306
327
"""Convert a list to a regex string for matching a directive.
@@ -328,28 +349,25 @@ def pattern(self, format):
328
349
regex syntax are escaped.
329
350
330
351
"""
331
- processed_format = ''
332
352
# The sub() call escapes all characters that might be misconstrued
333
353
# as regex syntax. Cannot use re.escape since we have to deal with
334
354
# format directives (%m, etc.).
335
- regex_chars = re_compile (r"([\\.^$*+?\(\){}\[\]|])" )
336
- format = regex_chars .sub (r"\\\1" , format )
337
- whitespace_replacement = re_compile (r'\s+' )
338
- format = whitespace_replacement .sub (r'\\s+' , format )
355
+ format = re_sub (r"([\\.^$*+?\(\){}\[\]|])" , r"\\\1" , format )
356
+ format = re_sub (r'\s+' , r'\\s+' , format )
357
+ format = re_sub (r"'" , "['\u02bc ]" , format ) # needed for br_FR
339
358
year_in_format = False
340
359
day_of_month_in_format = False
341
- while '%' in format :
342
- directive_index = format .index ('%' )+ 1
343
- format_char = format [directive_index ]
344
- processed_format = "%s%s%s" % (processed_format ,
345
- format [:directive_index - 1 ],
346
- self [format_char ])
347
- format = format [directive_index + 1 :]
360
+ def repl (m ):
361
+ format_char = m [1 ]
348
362
match format_char :
349
363
case 'Y' | 'y' | 'G' :
364
+ nonlocal year_in_format
350
365
year_in_format = True
351
366
case 'd' :
367
+ nonlocal day_of_month_in_format
352
368
day_of_month_in_format = True
369
+ return self [format_char ]
370
+ format = re_sub (r'%(O?.)' , repl , format )
353
371
if day_of_month_in_format and not year_in_format :
354
372
import warnings
355
373
warnings .warn ("""\
@@ -360,7 +378,7 @@ def pattern(self, format):
360
378
See https://github.com/python/cpython/issues/70647.""" ,
361
379
DeprecationWarning ,
362
380
skip_file_prefixes = (os .path .dirname (__file__ ),))
363
- return "%s%s" % ( processed_format , format )
381
+ return format
364
382
365
383
def compile (self , format ):
366
384
"""Return a compiled re object for the format string."""
@@ -434,8 +452,8 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
434
452
_regex_cache [format ] = format_regex
435
453
found = format_regex .match (data_string )
436
454
if not found :
437
- raise ValueError ("time data %r does not match format %r :: /%s/ " %
438
- (data_string , format , format_regex . pattern ))
455
+ raise ValueError ("time data %r does not match format %r" %
456
+ (data_string , format ))
439
457
if len (data_string ) != found .end ():
440
458
raise ValueError ("unconverted data remains: %s" %
441
459
data_string [found .end ():])
0 commit comments