Skip to content

Commit b62e9ae

Browse files
anmyachevjreback
authored andcommitted
PERF: Cython version of Python _TIMEPAT regexp in parsing.pyx (#26204)
1 parent d41c1da commit b62e9ae

File tree

2 files changed

+36
-5
lines changed

2 files changed

+36
-5
lines changed

asv_bench/benchmarks/io/csv.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -252,11 +252,12 @@ def mem_parser_chunks(self):
252252

253253

254254
class ReadCSVParseSpecialDate(StringIORewind):
255-
params = (['mY', 'mdY'],)
255+
params = (['mY', 'mdY', 'hm'],)
256256
params_name = ['value']
257257
objects = {
258258
'mY': '01-2019\n10-2019\n02/2000\n',
259-
'mdY': '12/02/2010\n'
259+
'mdY': '12/02/2010\n',
260+
'hm': '21:34\n'
260261
}
261262

262263
def setup(self, value):

pandas/_libs/tslibs/parsing.pyx

+33-3
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,6 @@ _DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0,
4444
second=0, microsecond=0)
4545

4646
cdef:
47-
object _TIMEPAT = re.compile(r'^([01]?[0-9]|2[0-3]):([0-5][0-9])')
48-
4947
set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'}
5048

5149
# ----------------------------------------------------------------------
@@ -144,6 +142,38 @@ cdef inline object _parse_delimited_date(object date_string, bint dayfirst):
144142
raise DateParseError("Invalid date specified ({}/{})".format(month, day))
145143

146144

145+
cdef inline bint does_string_look_like_time(object parse_string):
146+
"""
147+
Checks whether given string is a time: it has to start either from
148+
H:MM or from HH:MM, and hour and minute values must be valid.
149+
150+
Parameters
151+
----------
152+
date_string : str
153+
154+
Returns:
155+
--------
156+
whether given string is a time
157+
"""
158+
cdef:
159+
const char* buf
160+
Py_ssize_t length
161+
int hour = -1, minute = -1
162+
163+
buf = get_c_string_buf_and_size(parse_string, &length)
164+
if length >= 4:
165+
if buf[1] == b':':
166+
# h:MM format
167+
hour = getdigit_ascii(buf[0], -1)
168+
minute = _parse_2digit(buf + 2)
169+
elif buf[2] == b':':
170+
# HH:MM format
171+
hour = _parse_2digit(buf)
172+
minute = _parse_2digit(buf + 3)
173+
174+
return 0 <= hour <= 23 and 0 <= minute <= 59
175+
176+
147177
def parse_datetime_string(date_string, freq=None, dayfirst=False,
148178
yearfirst=False, **kwargs):
149179
"""parse datetime string, only returns datetime.
@@ -160,7 +190,7 @@ def parse_datetime_string(date_string, freq=None, dayfirst=False,
160190
if not _does_string_look_like_datetime(date_string):
161191
raise ValueError('Given date string not likely a datetime.')
162192

163-
if _TIMEPAT.match(date_string):
193+
if does_string_look_like_time(date_string):
164194
# use current datetime as default, not pass _DEFAULT_DATETIME
165195
dt = du_parse(date_string, dayfirst=dayfirst,
166196
yearfirst=yearfirst, **kwargs)

0 commit comments

Comments
 (0)