Skip to content

Commit 06a9c76

Browse files
committed
PERF: Cython version of Python _TIMEPAT regexp in parsing.pyx (pandas-dev#26204)
1 parent 94535a3 commit 06a9c76

File tree

2 files changed

+38
-6
lines changed

2 files changed

+38
-6
lines changed

asv_bench/benchmarks/io/csv.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,11 +234,12 @@ def mem_parser_chunks(self):
234234

235235

236236
class ReadCSVParseSpecialDate(StringIORewind):
237-
params = (['mY', 'mdY'],)
237+
params = (['mY', 'mdY', 'hm'],)
238238
params_name = ['value']
239239
objects = {
240240
'mY': '01-2019\n10-2019\n02/2000\n',
241-
'mdY': '12/02/2010\n'
241+
'mdY': '12/02/2010\n',
242+
'hm': '21:34\n'
242243
}
243244

244245
def setup(self, value):

pandas/_libs/tslibs/parsing.pyx

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,8 @@ class DateParseError(ValueError):
7070
_DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0,
7171
second=0, microsecond=0)
7272

73-
cdef object _TIMEPAT = re.compile(r'^([01]?[0-9]|2[0-3]):([0-5][0-9])')
74-
75-
cdef set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'}
73+
cdef:
74+
set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'}
7675

7776
# ----------------------------------------------------------------------
7877
cdef:
@@ -170,6 +169,38 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst):
170169
raise DateParseError("Invalid date specified ({}/{})".format(month, day))
171170

172171

172+
cdef inline bint does_string_look_like_time(object parse_string):
173+
"""
174+
Checks whether given string is a time: it has to start either from
175+
H:MM or from HH:MM, and hour and minute values must be valid.
176+
177+
Parameters
178+
----------
179+
date_string : str
180+
181+
Returns:
182+
--------
183+
whether given string is a time
184+
"""
185+
cdef:
186+
const char* buf
187+
Py_ssize_t length
188+
int hour = -1, minute = -1
189+
190+
buf = get_c_string_buf_and_size(parse_string, &length)
191+
if length >= 4:
192+
if buf[1] == b':':
193+
# h:MM format
194+
hour = getdigit_ascii(buf[0], -1)
195+
minute = _parse_2digit(buf + 2)
196+
elif buf[2] == b':':
197+
# HH:MM format
198+
hour = _parse_2digit(buf)
199+
minute = _parse_2digit(buf + 3)
200+
201+
return 0 <= hour <= 23 and 0 <= minute <= 59
202+
203+
173204
def parse_datetime_string(date_string, freq=None, dayfirst=False,
174205
yearfirst=False, **kwargs):
175206
"""parse datetime string, only returns datetime.
@@ -186,7 +217,7 @@ def parse_datetime_string(date_string, freq=None, dayfirst=False,
186217
if not _does_string_look_like_datetime(date_string):
187218
raise ValueError('Given date string not likely a datetime.')
188219

189-
if _TIMEPAT.match(date_string):
220+
if does_string_look_like_time(date_string):
190221
# use current datetime as default, not pass _DEFAULT_DATETIME
191222
dt = du_parse(date_string, dayfirst=dayfirst,
192223
yearfirst=yearfirst, **kwargs)

0 commit comments

Comments
 (0)