Skip to content

Commit bceeeeb

Browse files
jbrockmendelNo-Stream
authored andcommitted
Implement npy_dtime.pyx (pandas-dev#17805)
1 parent b945703 commit bceeeeb

File tree

5 files changed

+138
-55
lines changed

5 files changed

+138
-55
lines changed

pandas/_libs/tslib.pyx

+18-28
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ from datetime cimport (
4747
npy_datetime,
4848
is_leapyear,
4949
dayofweek,
50-
check_dts_bounds,
5150
PANDAS_FR_ns,
5251
PyDateTime_Check, PyDate_Check,
5352
PyDateTime_IMPORT,
@@ -58,6 +57,9 @@ from datetime cimport (
5857
from datetime import timedelta, datetime
5958
from datetime import time as datetime_time
6059

60+
from tslibs.np_datetime cimport check_dts_bounds
61+
from tslibs.np_datetime import OutOfBoundsDatetime
62+
6163
from khash cimport (
6264
khiter_t,
6365
kh_destroy_int64, kh_put_int64,
@@ -732,7 +734,7 @@ class Timestamp(_Timestamp):
732734
ts = convert_datetime_to_tsobject(ts_input, _tzinfo)
733735
value = ts.value + (dts.ps // 1000)
734736
if value != NPY_NAT:
735-
_check_dts_bounds(&dts)
737+
check_dts_bounds(&dts)
736738

737739
return create_timestamp_from_ts(value, dts, _tzinfo, self.freq)
738740

@@ -1645,7 +1647,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit,
16451647
'Timestamp'.format(ts, type(ts)))
16461648

16471649
if obj.value != NPY_NAT:
1648-
_check_dts_bounds(&obj.dts)
1650+
check_dts_bounds(&obj.dts)
16491651

16501652
if tz is not None:
16511653
_localize_tso(obj, tz)
@@ -1726,7 +1728,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz,
17261728
obj.value += nanos
17271729
obj.dts.ps = nanos * 1000
17281730

1729-
_check_dts_bounds(&obj.dts)
1731+
check_dts_bounds(&obj.dts)
17301732
return obj
17311733

17321734

@@ -1762,12 +1764,12 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit,
17621764
_string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset)
17631765
obj.value = pandas_datetimestruct_to_datetime(
17641766
PANDAS_FR_ns, &obj.dts)
1765-
_check_dts_bounds(&obj.dts)
1767+
check_dts_bounds(&obj.dts)
17661768
if out_local == 1:
17671769
obj.tzinfo = pytz.FixedOffset(out_tzoffset)
17681770
obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC')
17691771
if tz is None:
1770-
_check_dts_bounds(&obj.dts)
1772+
check_dts_bounds(&obj.dts)
17711773
return obj
17721774
else:
17731775
# Keep the converter same as PyDateTime's
@@ -1810,7 +1812,7 @@ def _test_parse_iso8601(object ts):
18101812

18111813
_string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset)
18121814
obj.value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &obj.dts)
1813-
_check_dts_bounds(&obj.dts)
1815+
check_dts_bounds(&obj.dts)
18141816
if out_local == 1:
18151817
obj.tzinfo = pytz.FixedOffset(out_tzoffset)
18161818
obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC')
@@ -1897,18 +1899,6 @@ cpdef inline object _localize_pydatetime(object dt, object tz):
18971899
return dt.replace(tzinfo=tz)
18981900

18991901

1900-
class OutOfBoundsDatetime(ValueError):
1901-
pass
1902-
1903-
cdef inline _check_dts_bounds(pandas_datetimestruct *dts):
1904-
if check_dts_bounds(dts):
1905-
fmt = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, dts.month,
1906-
dts.day, dts.hour,
1907-
dts.min, dts.sec)
1908-
raise OutOfBoundsDatetime(
1909-
'Out of bounds nanosecond timestamp: %s' % fmt)
1910-
1911-
19121902
def datetime_to_datetime64(ndarray[object] values):
19131903
cdef:
19141904
Py_ssize_t i, n = len(values)
@@ -1933,13 +1923,13 @@ def datetime_to_datetime64(ndarray[object] values):
19331923

19341924
_ts = convert_datetime_to_tsobject(val, None)
19351925
iresult[i] = _ts.value
1936-
_check_dts_bounds(&_ts.dts)
1926+
check_dts_bounds(&_ts.dts)
19371927
else:
19381928
if inferred_tz is not None:
19391929
raise ValueError('Cannot mix tz-aware with '
19401930
'tz-naive values')
19411931
iresult[i] = _pydatetime_to_dts(val, &dts)
1942-
_check_dts_bounds(&dts)
1932+
check_dts_bounds(&dts)
19431933
else:
19441934
raise TypeError('Unrecognized value type: %s' % type(val))
19451935

@@ -2252,7 +2242,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
22522242
_ts = convert_datetime_to_tsobject(val, None)
22532243
iresult[i] = _ts.value
22542244
try:
2255-
_check_dts_bounds(&_ts.dts)
2245+
check_dts_bounds(&_ts.dts)
22562246
except ValueError:
22572247
if is_coerce:
22582248
iresult[i] = NPY_NAT
@@ -2267,7 +2257,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
22672257
if is_timestamp(val):
22682258
iresult[i] += val.nanosecond
22692259
try:
2270-
_check_dts_bounds(&dts)
2260+
check_dts_bounds(&dts)
22712261
except ValueError:
22722262
if is_coerce:
22732263
iresult[i] = NPY_NAT
@@ -2277,7 +2267,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
22772267
elif PyDate_Check(val):
22782268
iresult[i] = _date_to_datetime64(val, &dts)
22792269
try:
2280-
_check_dts_bounds(&dts)
2270+
check_dts_bounds(&dts)
22812271
seen_datetime = 1
22822272
except ValueError:
22832273
if is_coerce:
@@ -2334,7 +2324,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
23342324
tz = pytz.FixedOffset(out_tzoffset)
23352325
value = tz_convert_single(value, tz, 'UTC')
23362326
iresult[i] = value
2337-
_check_dts_bounds(&dts)
2327+
check_dts_bounds(&dts)
23382328
except ValueError:
23392329
# if requiring iso8601 strings, skip trying other formats
23402330
if require_iso8601:
@@ -2433,7 +2423,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
24332423
oresult[i] = parse_datetime_string(val, dayfirst=dayfirst,
24342424
yearfirst=yearfirst)
24352425
_pydatetime_to_dts(oresult[i], &dts)
2436-
_check_dts_bounds(&dts)
2426+
check_dts_bounds(&dts)
24372427
except Exception:
24382428
if is_raise:
24392429
raise
@@ -3239,7 +3229,7 @@ cdef inline _get_datetime64_nanos(object val):
32393229

32403230
if unit != PANDAS_FR_ns:
32413231
pandas_datetime_to_datetimestruct(ival, unit, &dts)
3242-
_check_dts_bounds(&dts)
3232+
check_dts_bounds(&dts)
32433233
return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
32443234
else:
32453235
return ival
@@ -3267,7 +3257,7 @@ def cast_to_nanoseconds(ndarray arr):
32673257
if ivalues[i] != NPY_NAT:
32683258
pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts)
32693259
iresult[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
3270-
_check_dts_bounds(&dts)
3260+
check_dts_bounds(&dts)
32713261
else:
32723262
iresult[i] = NPY_NAT
32733263

pandas/_libs/tslibs/np_datetime.pxd

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# -*- coding: utf-8 -*-
2+
# cython: profile=False
3+
4+
from numpy cimport int64_t, int32_t
5+
6+
7+
cdef extern from "../src/datetime/np_datetime.h":
8+
ctypedef struct pandas_datetimestruct:
9+
int64_t year
10+
int32_t month, day, hour, min, sec, us, ps, as
11+
12+
13+
cdef check_dts_bounds(pandas_datetimestruct *dts)
14+
15+
cdef int64_t dtstruct_to_dt64(pandas_datetimestruct* dts) nogil
16+
cdef void dt64_to_dtstruct(int64_t dt64, pandas_datetimestruct* out) nogil

pandas/_libs/tslibs/np_datetime.pyx

+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# -*- coding: utf-8 -*-
2+
# cython: profile=False
3+
4+
from numpy cimport int64_t
5+
6+
cdef extern from "numpy/ndarrayobject.h":
7+
ctypedef int64_t npy_timedelta
8+
ctypedef int64_t npy_datetime
9+
10+
cdef extern from "../src/datetime/np_datetime.h":
11+
ctypedef enum PANDAS_DATETIMEUNIT:
12+
PANDAS_FR_Y
13+
PANDAS_FR_M
14+
PANDAS_FR_W
15+
PANDAS_FR_D
16+
PANDAS_FR_B
17+
PANDAS_FR_h
18+
PANDAS_FR_m
19+
PANDAS_FR_s
20+
PANDAS_FR_ms
21+
PANDAS_FR_us
22+
PANDAS_FR_ns
23+
PANDAS_FR_ps
24+
PANDAS_FR_fs
25+
PANDAS_FR_as
26+
27+
int cmp_pandas_datetimestruct(pandas_datetimestruct *a,
28+
pandas_datetimestruct *b)
29+
30+
npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr,
31+
pandas_datetimestruct *d
32+
) nogil
33+
34+
void pandas_datetime_to_datetimestruct(npy_datetime val,
35+
PANDAS_DATETIMEUNIT fr,
36+
pandas_datetimestruct *result) nogil
37+
38+
pandas_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS
39+
40+
# ----------------------------------------------------------------------
41+
42+
43+
class OutOfBoundsDatetime(ValueError):
44+
pass
45+
46+
47+
cdef inline check_dts_bounds(pandas_datetimestruct *dts):
48+
"""Raises OutOfBoundsDatetime if the given date is outside the range that
49+
can be represented by nanosecond-resolution 64-bit integers."""
50+
cdef:
51+
bint error = False
52+
53+
if (dts.year <= 1677 and
54+
cmp_pandas_datetimestruct(dts, &_NS_MIN_DTS) == -1):
55+
error = True
56+
elif (dts.year >= 2262 and
57+
cmp_pandas_datetimestruct(dts, &_NS_MAX_DTS) == 1):
58+
error = True
59+
60+
if error:
61+
fmt = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, dts.month,
62+
dts.day, dts.hour,
63+
dts.min, dts.sec)
64+
raise OutOfBoundsDatetime(
65+
'Out of bounds nanosecond timestamp: {fmt}'.format(fmt=fmt))
66+
67+
68+
# ----------------------------------------------------------------------
69+
# Conversion
70+
71+
cdef inline int64_t dtstruct_to_dt64(pandas_datetimestruct* dts) nogil:
72+
"""Convenience function to call pandas_datetimestruct_to_datetime
73+
with the by-far-most-common frequency PANDAS_FR_ns"""
74+
return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts)
75+
76+
77+
cdef inline void dt64_to_dtstruct(int64_t dt64,
78+
pandas_datetimestruct* out) nogil:
79+
"""Convenience function to call pandas_datetime_to_datetimestruct
80+
with the by-far-most-common frequency PANDAS_FR_ns"""
81+
pandas_datetime_to_datetimestruct(dt64, PANDAS_FR_ns, out)
82+
return

pandas/_libs/tslibs/strptime.pyx

+7-15
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,8 @@ from numpy cimport ndarray, int64_t
3333
from datetime import date as datetime_date
3434
from datetime cimport datetime
3535

36-
# This is src/datetime.pxd
37-
from datetime cimport (
38-
PANDAS_FR_ns,
39-
check_dts_bounds,
40-
pandas_datetimestruct,
41-
pandas_datetimestruct_to_datetime)
36+
from np_datetime cimport (check_dts_bounds,
37+
dtstruct_to_dt64, pandas_datetimestruct)
4238

4339
from util cimport is_string_object, get_nat
4440

@@ -333,18 +329,14 @@ def array_strptime(ndarray[object] values, object fmt,
333329
dts.us = us
334330
dts.ps = ns * 1000
335331

336-
iresult[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
337-
if check_dts_bounds(&dts):
332+
iresult[i] = dtstruct_to_dt64(&dts)
333+
try:
334+
check_dts_bounds(&dts)
335+
except ValueError:
338336
if is_coerce:
339337
iresult[i] = NPY_NAT
340338
continue
341-
else:
342-
from pandas._libs.tslib import OutOfBoundsDatetime
343-
fmt = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, dts.month,
344-
dts.day, dts.hour,
345-
dts.min, dts.sec)
346-
raise OutOfBoundsDatetime(
347-
'Out of bounds nanosecond timestamp: %s' % fmt)
339+
raise
348340

349341
return result
350342

setup.py

+15-12
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,7 @@ class CheckSDist(sdist_class):
344344
'pandas/_libs/sparse.pyx',
345345
'pandas/_libs/parsers.pyx',
346346
'pandas/_libs/tslibs/strptime.pyx',
347+
'pandas/_libs/tslibs/np_datetime.pyx',
347348
'pandas/_libs/tslibs/timedeltas.pyx',
348349
'pandas/_libs/tslibs/timezones.pyx',
349350
'pandas/_libs/tslibs/fields.pyx',
@@ -469,12 +470,11 @@ def pxd(name):
469470
'pandas/_libs/src/parse_helper.h',
470471
'pandas/_libs/src/compat_helper.h']
471472

472-
473-
tseries_depends = ['pandas/_libs/src/datetime/np_datetime.h',
474-
'pandas/_libs/src/datetime/np_datetime_strings.h',
475-
'pandas/_libs/src/datetime.pxd']
476-
npdt_srces = ['pandas/_libs/src/datetime/np_datetime.c',
477-
'pandas/_libs/src/datetime/np_datetime_strings.c']
473+
np_datetime_headers = ['pandas/_libs/src/datetime/np_datetime.h',
474+
'pandas/_libs/src/datetime/np_datetime_strings.h']
475+
np_datetime_sources = ['pandas/_libs/src/datetime/np_datetime.c',
476+
'pandas/_libs/src/datetime/np_datetime_strings.c']
477+
tseries_depends = np_datetime_headers + ['pandas/_libs/src/datetime.pxd']
478478

479479
# some linux distros require it
480480
libraries = ['m'] if not is_platform_windows() else []
@@ -489,28 +489,31 @@ def pxd(name):
489489
_pxi_dep['hashtable'])},
490490
'_libs.tslibs.strptime': {'pyxfile': '_libs/tslibs/strptime',
491491
'depends': tseries_depends,
492-
'sources': npdt_srces},
492+
'sources': np_datetime_sources},
493493
'_libs.tslibs.offsets': {'pyxfile': '_libs/tslibs/offsets'},
494494
'_libs.tslib': {'pyxfile': '_libs/tslib',
495495
'pxdfiles': ['_libs/src/util', '_libs/lib'],
496496
'depends': tseries_depends,
497-
'sources': npdt_srces},
497+
'sources': np_datetime_sources},
498+
'_libs.tslibs.np_datetime': {'pyxfile': '_libs/tslibs/np_datetime',
499+
'depends': np_datetime_headers,
500+
'sources': np_datetime_sources},
498501
'_libs.tslibs.timedeltas': {'pyxfile': '_libs/tslibs/timedeltas'},
499502
'_libs.tslibs.timezones': {'pyxfile': '_libs/tslibs/timezones'},
500503
'_libs.tslibs.fields': {'pyxfile': '_libs/tslibs/fields',
501504
'depends': tseries_depends,
502-
'sources': npdt_srces},
505+
'sources': np_datetime_sources},
503506
'_libs.period': {'pyxfile': '_libs/period',
504507
'depends': (tseries_depends +
505508
['pandas/_libs/src/period_helper.h']),
506-
'sources': npdt_srces + [
509+
'sources': np_datetime_sources + [
507510
'pandas/_libs/src/period_helper.c']},
508511
'_libs.tslibs.parsing': {'pyxfile': '_libs/tslibs/parsing',
509512
'pxdfiles': ['_libs/src/util']},
510513
'_libs.tslibs.frequencies': {'pyxfile': '_libs/tslibs/frequencies',
511514
'pxdfiles': ['_libs/src/util']},
512515
'_libs.index': {'pyxfile': '_libs/index',
513-
'sources': npdt_srces,
516+
'sources': np_datetime_sources,
514517
'pxdfiles': ['_libs/src/util', '_libs/hashtable'],
515518
'depends': _pxi_dep['index']},
516519
'_libs.algos': {'pyxfile': '_libs/algos',
@@ -623,7 +626,7 @@ def pxd(name):
623626
'pandas/_libs/src/ujson/python/JSONtoObj.c',
624627
'pandas/_libs/src/ujson/lib/ultrajsonenc.c',
625628
'pandas/_libs/src/ujson/lib/ultrajsondec.c'] +
626-
npdt_srces),
629+
np_datetime_sources),
627630
include_dirs=(['pandas/_libs/src/ujson/python',
628631
'pandas/_libs/src/ujson/lib',
629632
'pandas/_libs/src/datetime'] +

0 commit comments

Comments
 (0)