Skip to content

Commit d545d3a

Browse files
committed
implemented _does_string_look_like_datetime in cython
1 parent e66883e commit d545d3a

File tree

3 files changed

+47
-70
lines changed

3 files changed

+47
-70
lines changed

pandas/_libs/tslibs/parsing.pyx

+44-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import re
77
import time
88

99
from cpython.datetime cimport datetime
10+
from cpython cimport PyUnicode_Check, PyBytes_Check, PyBytes_AsStringAndSize
1011

1112

1213
import numpy as np
@@ -33,7 +34,23 @@ from dateutil.parser import parse as du_parse
3334
from pandas._libs.tslibs.ccalendar import MONTH_NUMBERS
3435
from pandas._libs.tslibs.nattype import nat_strings, NaT
3536

36-
from pandas._libs.datehelpers import _does_string_look_like_datetime
37+
cdef extern from "errno.h":
38+
int errno
39+
40+
cdef extern from "../src/parser/tokenizer.h":
41+
double xstrtod(const char *p, char **q, char decimal, char sci, char tsep,
42+
int skip_trailing)
43+
44+
cdef extern from *:
45+
char* PyUnicode_AsUTF8AndSize(object unicode, Py_ssize_t* length)
46+
47+
cdef inline bint get_string_data(object s, char **buf, Py_ssize_t *length):
48+
if PyUnicode_Check(s):
49+
buf[0] = PyUnicode_AsUTF8AndSize(s, length)
50+
return buf[0] != NULL
51+
if PyBytes_Check(s):
52+
return PyBytes_AsStringAndSize(s, buf, length) == 0
53+
return False
3754

3855
# ----------------------------------------------------------------------
3956
# Constants
@@ -49,6 +66,8 @@ _DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0,
4966
cdef:
5067
object _TIMEPAT = re.compile(r'^([01]?[0-9]|2[0-3]):([0-5][0-9])')
5168

69+
set _not_datelike_strings = {'a', 'A', 'm', 'M', 'p', 'P', 't', 'T'}
70+
5271
# ----------------------------------------------------------------------
5372

5473
_get_option = None
@@ -187,6 +206,30 @@ cdef parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False,
187206
return parsed, parsed, reso
188207

189208

209+
cpdef bint _does_string_look_like_datetime(object date_string):
210+
cdef:
211+
char *buf = NULL
212+
char *endptr = NULL
213+
Py_ssize_t length = -1
214+
double converted_date
215+
char first
216+
217+
if not get_string_data(date_string, &buf, &length):
218+
return False
219+
if length >= 1:
220+
first = buf[0]
221+
if first == '0':
222+
return True
223+
elif length == 1 and date_string in _not_datelike_strings:
224+
return False
225+
else:
226+
converted_date = xstrtod(buf, &endptr, '.', 'e', '\0', 1)
227+
if errno == 0 and endptr == buf + length:
228+
return converted_date >= 1000
229+
230+
return True
231+
232+
190233
cdef inline object _parse_dateabbr_string(object date_string, object default,
191234
object freq):
192235
cdef:

pandas/_libs/tslibs/src/datetime/datehelpers.c

-68
Original file line numberDiff line numberDiff line change
@@ -322,75 +322,13 @@ static PyObject* concat_date_cols(PyObject *self, PyObject *args,
322322
}
323323
}
324324

325-
static char not_datelike[sizeof(char) * 256];
326-
327-
static PyObject* _does_string_look_like_datetime(PyObject* unused,
328-
PyObject* arg) {
329-
char *buf = NULL, *endptr = NULL;
330-
Py_ssize_t length = -1;
331-
double converted_date;
332-
int error = 0;
333-
int result = 1;
334-
335-
#if PY_MAJOR_VERSION == 2
336-
if (!PyString_CheckExact(arg)) {
337-
if (!PyUnicode_CheckExact(arg)) {
338-
// arg is not a string, so it's certainly
339-
// not a datetime-looking string
340-
PyErr_SetString(PyExc_ValueError,
341-
"_does_string_look_like_datetime expects a string");
342-
return NULL;
343-
}
344-
buf = PyUnicode_AS_DATA(arg);
345-
length = (int)PyUnicode_GET_SIZE(arg);
346-
} else {
347-
if (PyString_AsStringAndSize(arg, &buf, &length) == -1) {
348-
return NULL;
349-
}
350-
}
351-
#else
352-
if (!PyUnicode_CheckExact(arg) || !PyUnicode_IS_READY(arg)) {
353-
PyErr_SetString(PyExc_ValueError,
354-
"_does_string_look_like_datetime expects a string");
355-
return NULL;
356-
}
357-
buf = PyUnicode_DATA(arg);
358-
length = PyUnicode_GET_LENGTH(arg);
359-
#endif
360-
361-
if (length >= 1) {
362-
char first = *buf;
363-
if (first == '0') {
364-
result = 1;
365-
} else if (length == 1 && not_datelike[Py_CHARMASK(first)]) {
366-
result = 0;
367-
} else {
368-
converted_date = xstrtod(buf, &endptr, '.', 'e', '\0', 1);
369-
if ((errno == 0) && (endptr == buf + length)) {
370-
result = (converted_date >= 1000) ? 1 : 0;
371-
}
372-
}
373-
}
374-
375-
if (result) {
376-
Py_RETURN_TRUE;
377-
} else {
378-
Py_RETURN_FALSE;
379-
}
380-
}
381-
382325
static PyMethodDef module_methods[] = {
383326
/* name from python, name in C-file, ..., __doc__ string of method */
384327
{
385328
"concat_date_cols", (PyCFunction)concat_date_cols,
386329
METH_VARARGS | METH_KEYWORDS,
387330
"concatenates date cols and returns numpy array"
388331
},
389-
{
390-
"_does_string_look_like_datetime", _does_string_look_like_datetime,
391-
METH_O,
392-
"checks if string looks like a datetime"
393-
},
394332
{NULL, NULL, 0, NULL}
395333
};
396334

@@ -418,11 +356,5 @@ PY_DATEHELPERS_MODULE_INIT {
418356

419357
module = PY_MODULE_CREATE;
420358

421-
memset(not_datelike, 0, sizeof(not_datelike));
422-
not_datelike['a'] = not_datelike['A'] = 1;
423-
not_datelike['m'] = not_datelike['M'] = 1;
424-
not_datelike['p'] = not_datelike['P'] = 1;
425-
not_datelike['t'] = not_datelike['T'] = 1;
426-
427359
PY_RETURN_MODULE;
428360
}

setup.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -611,7 +611,9 @@ def srcpath(name=None, suffix='.pyx', subdir='src'):
611611
'sources': np_datetime_sources},
612612
'_libs.tslibs.parsing': {
613613
'pyxfile': '_libs/tslibs/parsing',
614-
'include': []},
614+
'include': common_include,
615+
'depends': ['pandas/_libs/src/parser/tokenizer.h'],
616+
'sources': ['pandas/_libs/src/parser/tokenizer.c']},
615617
'_libs.tslibs.period': {
616618
'pyxfile': '_libs/tslibs/period',
617619
'include': ts_include,

0 commit comments

Comments
 (0)