Skip to content

Commit 5c55f50

Browse files
[3.11] [3.12] bpo-42663: Fix parsing TZ strings in zoneinfo module (GH-23825) (GH-110882) (GH-110889)
zipinfo now supports the full range of values in the TZ string determined by RFC 8536 and detects all invalid formats. Both Python and C implementations now raise exceptions of the same type on invalid data. (cherry picked from commit ab08ff7) (cherry picked from commit 72b0f0e)
1 parent 44558a9 commit 5c55f50

File tree

4 files changed

+327
-259
lines changed

4 files changed

+327
-259
lines changed

Lib/test/test_zoneinfo/test_zoneinfo.py

Lines changed: 118 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -988,23 +988,114 @@ def test_tzstr_from_utc(self):
988988

989989
self.assertEqual(dt_act, dt_utc)
990990

991+
def test_extreme_tzstr(self):
992+
tzstrs = [
993+
# Extreme offset hour
994+
"AAA24",
995+
"AAA+24",
996+
"AAA-24",
997+
"AAA24BBB,J60/2,J300/2",
998+
"AAA+24BBB,J60/2,J300/2",
999+
"AAA-24BBB,J60/2,J300/2",
1000+
"AAA4BBB24,J60/2,J300/2",
1001+
"AAA4BBB+24,J60/2,J300/2",
1002+
"AAA4BBB-24,J60/2,J300/2",
1003+
# Extreme offset minutes
1004+
"AAA4:00BBB,J60/2,J300/2",
1005+
"AAA4:59BBB,J60/2,J300/2",
1006+
"AAA4BBB5:00,J60/2,J300/2",
1007+
"AAA4BBB5:59,J60/2,J300/2",
1008+
# Extreme offset seconds
1009+
"AAA4:00:00BBB,J60/2,J300/2",
1010+
"AAA4:00:59BBB,J60/2,J300/2",
1011+
"AAA4BBB5:00:00,J60/2,J300/2",
1012+
"AAA4BBB5:00:59,J60/2,J300/2",
1013+
# Extreme total offset
1014+
"AAA24:59:59BBB5,J60/2,J300/2",
1015+
"AAA-24:59:59BBB5,J60/2,J300/2",
1016+
"AAA4BBB24:59:59,J60/2,J300/2",
1017+
"AAA4BBB-24:59:59,J60/2,J300/2",
1018+
# Extreme months
1019+
"AAA4BBB,M12.1.1/2,M1.1.1/2",
1020+
"AAA4BBB,M1.1.1/2,M12.1.1/2",
1021+
# Extreme weeks
1022+
"AAA4BBB,M1.5.1/2,M1.1.1/2",
1023+
"AAA4BBB,M1.1.1/2,M1.5.1/2",
1024+
# Extreme weekday
1025+
"AAA4BBB,M1.1.6/2,M2.1.1/2",
1026+
"AAA4BBB,M1.1.1/2,M2.1.6/2",
1027+
# Extreme numeric offset
1028+
"AAA4BBB,0/2,20/2",
1029+
"AAA4BBB,0/2,0/14",
1030+
"AAA4BBB,20/2,365/2",
1031+
"AAA4BBB,365/2,365/14",
1032+
# Extreme julian offset
1033+
"AAA4BBB,J1/2,J20/2",
1034+
"AAA4BBB,J1/2,J1/14",
1035+
"AAA4BBB,J20/2,J365/2",
1036+
"AAA4BBB,J365/2,J365/14",
1037+
# Extreme transition hour
1038+
"AAA4BBB,J60/167,J300/2",
1039+
"AAA4BBB,J60/+167,J300/2",
1040+
"AAA4BBB,J60/-167,J300/2",
1041+
"AAA4BBB,J60/2,J300/167",
1042+
"AAA4BBB,J60/2,J300/+167",
1043+
"AAA4BBB,J60/2,J300/-167",
1044+
# Extreme transition minutes
1045+
"AAA4BBB,J60/2:00,J300/2",
1046+
"AAA4BBB,J60/2:59,J300/2",
1047+
"AAA4BBB,J60/2,J300/2:00",
1048+
"AAA4BBB,J60/2,J300/2:59",
1049+
# Extreme transition seconds
1050+
"AAA4BBB,J60/2:00:00,J300/2",
1051+
"AAA4BBB,J60/2:00:59,J300/2",
1052+
"AAA4BBB,J60/2,J300/2:00:00",
1053+
"AAA4BBB,J60/2,J300/2:00:59",
1054+
# Extreme total transition time
1055+
"AAA4BBB,J60/167:59:59,J300/2",
1056+
"AAA4BBB,J60/-167:59:59,J300/2",
1057+
"AAA4BBB,J60/2,J300/167:59:59",
1058+
"AAA4BBB,J60/2,J300/-167:59:59",
1059+
]
1060+
1061+
for tzstr in tzstrs:
1062+
with self.subTest(tzstr=tzstr):
1063+
self.zone_from_tzstr(tzstr)
1064+
9911065
def test_invalid_tzstr(self):
9921066
invalid_tzstrs = [
9931067
"PST8PDT", # DST but no transition specified
9941068
"+11", # Unquoted alphanumeric
9951069
"GMT,M3.2.0/2,M11.1.0/3", # Transition rule but no DST
9961070
"GMT0+11,M3.2.0/2,M11.1.0/3", # Unquoted alphanumeric in DST
9971071
"PST8PDT,M3.2.0/2", # Only one transition rule
998-
# Invalid offsets
999-
"STD+25",
1000-
"STD-25",
1001-
"STD+374",
1002-
"STD+374DST,M3.2.0/2,M11.1.0/3",
1003-
"STD+23DST+25,M3.2.0/2,M11.1.0/3",
1004-
"STD-23DST-25,M3.2.0/2,M11.1.0/3",
1072+
# Invalid offset hours
1073+
"AAA168",
1074+
"AAA+168",
1075+
"AAA-168",
1076+
"AAA168BBB,J60/2,J300/2",
1077+
"AAA+168BBB,J60/2,J300/2",
1078+
"AAA-168BBB,J60/2,J300/2",
1079+
"AAA4BBB168,J60/2,J300/2",
1080+
"AAA4BBB+168,J60/2,J300/2",
1081+
"AAA4BBB-168,J60/2,J300/2",
1082+
# Invalid offset minutes
1083+
"AAA4:0BBB,J60/2,J300/2",
1084+
"AAA4:100BBB,J60/2,J300/2",
1085+
"AAA4BBB5:0,J60/2,J300/2",
1086+
"AAA4BBB5:100,J60/2,J300/2",
1087+
# Invalid offset seconds
1088+
"AAA4:00:0BBB,J60/2,J300/2",
1089+
"AAA4:00:100BBB,J60/2,J300/2",
1090+
"AAA4BBB5:00:0,J60/2,J300/2",
1091+
"AAA4BBB5:00:100,J60/2,J300/2",
10051092
# Completely invalid dates
10061093
"AAA4BBB,M1443339,M11.1.0/3",
10071094
"AAA4BBB,M3.2.0/2,0349309483959c",
1095+
"AAA4BBB,,J300/2",
1096+
"AAA4BBB,z,J300/2",
1097+
"AAA4BBB,J60/2,",
1098+
"AAA4BBB,J60/2,z",
10081099
# Invalid months
10091100
"AAA4BBB,M13.1.1/2,M1.1.1/2",
10101101
"AAA4BBB,M1.1.1/2,M13.1.1/2",
@@ -1024,6 +1115,26 @@ def test_invalid_tzstr(self):
10241115
# Invalid julian offset
10251116
"AAA4BBB,J0/2,J20/2",
10261117
"AAA4BBB,J20/2,J366/2",
1118+
# Invalid transition time
1119+
"AAA4BBB,J60/2/3,J300/2",
1120+
"AAA4BBB,J60/2,J300/2/3",
1121+
# Invalid transition hour
1122+
"AAA4BBB,J60/168,J300/2",
1123+
"AAA4BBB,J60/+168,J300/2",
1124+
"AAA4BBB,J60/-168,J300/2",
1125+
"AAA4BBB,J60/2,J300/168",
1126+
"AAA4BBB,J60/2,J300/+168",
1127+
"AAA4BBB,J60/2,J300/-168",
1128+
# Invalid transition minutes
1129+
"AAA4BBB,J60/2:0,J300/2",
1130+
"AAA4BBB,J60/2:100,J300/2",
1131+
"AAA4BBB,J60/2,J300/2:0",
1132+
"AAA4BBB,J60/2,J300/2:100",
1133+
# Invalid transition seconds
1134+
"AAA4BBB,J60/2:00:0,J300/2",
1135+
"AAA4BBB,J60/2:00:100,J300/2",
1136+
"AAA4BBB,J60/2,J300/2:00:0",
1137+
"AAA4BBB,J60/2,J300/2:00:100",
10271138
]
10281139

10291140
for invalid_tzstr in invalid_tzstrs:

Lib/zoneinfo/_zoneinfo.py

Lines changed: 53 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -517,8 +517,8 @@ class _DayOffset:
517517
__slots__ = ["d", "julian", "hour", "minute", "second"]
518518

519519
def __init__(self, d, julian, hour=2, minute=0, second=0):
520-
if not (0 + julian) <= d <= 365:
521-
min_day = 0 + julian
520+
min_day = 0 + julian # convert bool to int
521+
if not min_day <= d <= 365:
522522
raise ValueError(f"d must be in [{min_day}, 365], not: {d}")
523523

524524
self.d = d
@@ -560,11 +560,11 @@ class _CalendarOffset:
560560
)
561561

562562
def __init__(self, m, w, d, hour=2, minute=0, second=0):
563-
if not 0 < m <= 12:
564-
raise ValueError("m must be in (0, 12]")
563+
if not 1 <= m <= 12:
564+
raise ValueError("m must be in [1, 12]")
565565

566-
if not 0 < w <= 5:
567-
raise ValueError("w must be in (0, 5]")
566+
if not 1 <= w <= 5:
567+
raise ValueError("w must be in [1, 5]")
568568

569569
if not 0 <= d <= 6:
570570
raise ValueError("d must be in [0, 6]")
@@ -634,18 +634,21 @@ def _parse_tz_str(tz_str):
634634

635635
offset_str, *start_end_str = tz_str.split(",", 1)
636636

637-
# fmt: off
638637
parser_re = re.compile(
639-
r"(?P<std>[^<0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" +
640-
r"((?P<stdoff>[+-]?\d{1,2}(:\d{2}(:\d{2})?)?)" +
641-
r"((?P<dst>[^0-9:.+-]+|<[a-zA-Z0-9+\-]+>)" +
642-
r"((?P<dstoff>[+-]?\d{1,2}(:\d{2}(:\d{2})?)?))?" +
643-
r")?" + # dst
644-
r")?$" # stdoff
638+
r"""
639+
(?P<std>[^<0-9:.+-]+|<[a-zA-Z0-9+-]+>)
640+
(?:
641+
(?P<stdoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?)
642+
(?:
643+
(?P<dst>[^0-9:.+-]+|<[a-zA-Z0-9+-]+>)
644+
(?P<dstoff>[+-]?\d{1,3}(?::\d{2}(?::\d{2})?)?)?
645+
)? # dst
646+
)? # stdoff
647+
""",
648+
re.ASCII|re.VERBOSE
645649
)
646-
# fmt: on
647650

648-
m = parser_re.match(offset_str)
651+
m = parser_re.fullmatch(offset_str)
649652

650653
if m is None:
651654
raise ValueError(f"{tz_str} is not a valid TZ string")
@@ -696,16 +699,17 @@ def _parse_tz_str(tz_str):
696699

697700

698701
def _parse_dst_start_end(dststr):
699-
date, *time = dststr.split("/")
700-
if date[0] == "M":
702+
date, *time = dststr.split("/", 1)
703+
type = date[:1]
704+
if type == "M":
701705
n_is_julian = False
702-
m = re.match(r"M(\d{1,2})\.(\d).(\d)$", date)
706+
m = re.fullmatch(r"M(\d{1,2})\.(\d).(\d)", date, re.ASCII)
703707
if m is None:
704708
raise ValueError(f"Invalid dst start/end date: {dststr}")
705709
date_offset = tuple(map(int, m.groups()))
706710
offset = _CalendarOffset(*date_offset)
707711
else:
708-
if date[0] == "J":
712+
if type == "J":
709713
n_is_julian = True
710714
date = date[1:]
711715
else:
@@ -715,38 +719,54 @@ def _parse_dst_start_end(dststr):
715719
offset = _DayOffset(doy, n_is_julian)
716720

717721
if time:
718-
time_components = list(map(int, time[0].split(":")))
719-
n_components = len(time_components)
720-
if n_components < 3:
721-
time_components.extend([0] * (3 - n_components))
722-
offset.hour, offset.minute, offset.second = time_components
722+
offset.hour, offset.minute, offset.second = _parse_transition_time(time[0])
723723

724724
return offset
725725

726726

727+
def _parse_transition_time(time_str):
728+
match = re.fullmatch(
729+
r"(?P<sign>[+-])?(?P<h>\d{1,3})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?",
730+
time_str,
731+
re.ASCII
732+
)
733+
if match is None:
734+
raise ValueError(f"Invalid time: {time_str}")
735+
736+
h, m, s = (int(v or 0) for v in match.group("h", "m", "s"))
737+
738+
if h > 167:
739+
raise ValueError(
740+
f"Hour must be in [0, 167]: {time_str}"
741+
)
742+
743+
if match.group("sign") == "-":
744+
h, m, s = -h, -m, -s
745+
746+
return h, m, s
747+
748+
727749
def _parse_tz_delta(tz_delta):
728-
match = re.match(
729-
r"(?P<sign>[+-])?(?P<h>\d{1,2})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?",
750+
match = re.fullmatch(
751+
r"(?P<sign>[+-])?(?P<h>\d{1,3})(:(?P<m>\d{2})(:(?P<s>\d{2}))?)?",
730752
tz_delta,
753+
re.ASCII
731754
)
732755
# Anything passed to this function should already have hit an equivalent
733756
# regular expression to find the section to parse.
734757
assert match is not None, tz_delta
735758

736-
h, m, s = (
737-
int(v) if v is not None else 0
738-
for v in map(match.group, ("h", "m", "s"))
739-
)
759+
h, m, s = (int(v or 0) for v in match.group("h", "m", "s"))
740760

741761
total = h * 3600 + m * 60 + s
742762

743-
if not -86400 < total < 86400:
763+
if h > 24:
744764
raise ValueError(
745-
f"Offset must be strictly between -24h and +24h: {tz_delta}"
765+
f"Offset hours must be in [0, 24]: {tz_delta}"
746766
)
747767

748768
# Yes, +5 maps to an offset of -5h
749769
if match.group("sign") != "-":
750-
total *= -1
770+
total = -total
751771

752772
return total
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
:mod:`zipinfo` now supports the full range of values in the TZ string
2+
determined by RFC 8536 and detects all invalid formats.
3+
Both Python and C implementations now raise exceptions of the same
4+
type on invalid data.

0 commit comments

Comments
 (0)