Skip to content

Implement npy_dtime.pyx #17805

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Oct 29, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 18 additions & 28 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ from datetime cimport (
npy_datetime,
is_leapyear,
dayofweek,
check_dts_bounds,
PANDAS_FR_ns,
PyDateTime_Check, PyDate_Check,
PyDateTime_IMPORT,
Expand All @@ -58,6 +57,9 @@ from datetime cimport (
from datetime import timedelta, datetime
from datetime import time as datetime_time

from tslibs.np_datetime cimport check_dts_bounds
from tslibs.np_datetime import OutOfBoundsDatetime

from khash cimport (
khiter_t,
kh_destroy_int64, kh_put_int64,
Expand Down Expand Up @@ -732,7 +734,7 @@ class Timestamp(_Timestamp):
ts = convert_datetime_to_tsobject(ts_input, _tzinfo)
value = ts.value + (dts.ps // 1000)
if value != NPY_NAT:
_check_dts_bounds(&dts)
check_dts_bounds(&dts)

return create_timestamp_from_ts(value, dts, _tzinfo, self.freq)

Expand Down Expand Up @@ -1645,7 +1647,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit,
'Timestamp'.format(ts, type(ts)))

if obj.value != NPY_NAT:
_check_dts_bounds(&obj.dts)
check_dts_bounds(&obj.dts)

if tz is not None:
_localize_tso(obj, tz)
Expand Down Expand Up @@ -1726,7 +1728,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz,
obj.value += nanos
obj.dts.ps = nanos * 1000

_check_dts_bounds(&obj.dts)
check_dts_bounds(&obj.dts)
return obj


Expand Down Expand Up @@ -1762,12 +1764,12 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit,
_string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset)
obj.value = pandas_datetimestruct_to_datetime(
PANDAS_FR_ns, &obj.dts)
_check_dts_bounds(&obj.dts)
check_dts_bounds(&obj.dts)
if out_local == 1:
obj.tzinfo = pytz.FixedOffset(out_tzoffset)
obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC')
if tz is None:
_check_dts_bounds(&obj.dts)
check_dts_bounds(&obj.dts)
return obj
else:
# Keep the converter same as PyDateTime's
Expand Down Expand Up @@ -1810,7 +1812,7 @@ def _test_parse_iso8601(object ts):

_string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset)
obj.value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &obj.dts)
_check_dts_bounds(&obj.dts)
check_dts_bounds(&obj.dts)
if out_local == 1:
obj.tzinfo = pytz.FixedOffset(out_tzoffset)
obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC')
Expand Down Expand Up @@ -1897,18 +1899,6 @@ cpdef inline object _localize_pydatetime(object dt, object tz):
return dt.replace(tzinfo=tz)


class OutOfBoundsDatetime(ValueError):
pass

cdef inline _check_dts_bounds(pandas_datetimestruct *dts):
if check_dts_bounds(dts):
fmt = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, dts.month,
dts.day, dts.hour,
dts.min, dts.sec)
raise OutOfBoundsDatetime(
'Out of bounds nanosecond timestamp: %s' % fmt)


def datetime_to_datetime64(ndarray[object] values):
cdef:
Py_ssize_t i, n = len(values)
Expand All @@ -1933,13 +1923,13 @@ def datetime_to_datetime64(ndarray[object] values):

_ts = convert_datetime_to_tsobject(val, None)
iresult[i] = _ts.value
_check_dts_bounds(&_ts.dts)
check_dts_bounds(&_ts.dts)
else:
if inferred_tz is not None:
raise ValueError('Cannot mix tz-aware with '
'tz-naive values')
iresult[i] = _pydatetime_to_dts(val, &dts)
_check_dts_bounds(&dts)
check_dts_bounds(&dts)
else:
raise TypeError('Unrecognized value type: %s' % type(val))

Expand Down Expand Up @@ -2252,7 +2242,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
_ts = convert_datetime_to_tsobject(val, None)
iresult[i] = _ts.value
try:
_check_dts_bounds(&_ts.dts)
check_dts_bounds(&_ts.dts)
except ValueError:
if is_coerce:
iresult[i] = NPY_NAT
Expand All @@ -2267,7 +2257,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
if is_timestamp(val):
iresult[i] += val.nanosecond
try:
_check_dts_bounds(&dts)
check_dts_bounds(&dts)
except ValueError:
if is_coerce:
iresult[i] = NPY_NAT
Expand All @@ -2277,7 +2267,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
elif PyDate_Check(val):
iresult[i] = _date_to_datetime64(val, &dts)
try:
_check_dts_bounds(&dts)
check_dts_bounds(&dts)
seen_datetime = 1
except ValueError:
if is_coerce:
Expand Down Expand Up @@ -2334,7 +2324,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
tz = pytz.FixedOffset(out_tzoffset)
value = tz_convert_single(value, tz, 'UTC')
iresult[i] = value
_check_dts_bounds(&dts)
check_dts_bounds(&dts)
except ValueError:
# if requiring iso8601 strings, skip trying other formats
if require_iso8601:
Expand Down Expand Up @@ -2433,7 +2423,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
oresult[i] = parse_datetime_string(val, dayfirst=dayfirst,
yearfirst=yearfirst)
_pydatetime_to_dts(oresult[i], &dts)
_check_dts_bounds(&dts)
check_dts_bounds(&dts)
except Exception:
if is_raise:
raise
Expand Down Expand Up @@ -3239,7 +3229,7 @@ cdef inline _get_datetime64_nanos(object val):

if unit != PANDAS_FR_ns:
pandas_datetime_to_datetimestruct(ival, unit, &dts)
_check_dts_bounds(&dts)
check_dts_bounds(&dts)
return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
else:
return ival
Expand Down Expand Up @@ -3267,7 +3257,7 @@ def cast_to_nanoseconds(ndarray arr):
if ivalues[i] != NPY_NAT:
pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts)
iresult[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
_check_dts_bounds(&dts)
check_dts_bounds(&dts)
else:
iresult[i] = NPY_NAT

Expand Down
16 changes: 16 additions & 0 deletions pandas/_libs/tslibs/np_datetime.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# -*- coding: utf-8 -*-
# cython: profile=False

from numpy cimport int64_t, int32_t
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you could actually just call this module util i think

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to avoid name overlap with the existing libs/src/utilmodule. Also because in the dev branch I've ported src/util to a pure-cython (no C deps--> much simpler setup.py) tslibs.util. Don't want to get those mixed up.

My first choice is still the original npy_dtime, since np_datetime overlaps with the existing libs/src/datetime/np_datetime files.



cdef extern from "../src/datetime/np_datetime.h":
ctypedef struct pandas_datetimestruct:
int64_t year
int32_t month, day, hour, min, sec, us, ps, as


cdef check_dts_bounds(pandas_datetimestruct *dts)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you need a void here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, saw your note, pls add to the list to add this as void (if possible / needed), not sure.


cdef int64_t dtstruct_to_dt64(pandas_datetimestruct* dts) nogil
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where these nogil before?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The analogous src/datetime functions are, yes.

cdef void dt64_to_dtstruct(int64_t dt64, pandas_datetimestruct* out) nogil
82 changes: 82 additions & 0 deletions pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# -*- coding: utf-8 -*-
# cython: profile=False

from numpy cimport int64_t

cdef extern from "numpy/ndarrayobject.h":
ctypedef int64_t npy_timedelta
ctypedef int64_t npy_datetime

cdef extern from "../src/datetime/np_datetime.h":
ctypedef enum PANDAS_DATETIMEUNIT:
PANDAS_FR_Y
PANDAS_FR_M
PANDAS_FR_W
PANDAS_FR_D
PANDAS_FR_B
PANDAS_FR_h
PANDAS_FR_m
PANDAS_FR_s
PANDAS_FR_ms
PANDAS_FR_us
PANDAS_FR_ns
PANDAS_FR_ps
PANDAS_FR_fs
PANDAS_FR_as

int cmp_pandas_datetimestruct(pandas_datetimestruct *a,
pandas_datetimestruct *b)

npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr,
pandas_datetimestruct *d
) nogil

void pandas_datetime_to_datetimestruct(npy_datetime val,
PANDAS_DATETIMEUNIT fr,
pandas_datetimestruct *result) nogil

pandas_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS

# ----------------------------------------------------------------------


class OutOfBoundsDatetime(ValueError):
pass


cdef inline check_dts_bounds(pandas_datetimestruct *dts):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

void?

"""Raises OutOfBoundsDatetime if the given date is outside the range that
can be represented by nanosecond-resolution 64-bit integers."""
cdef:
bint error = False

if (dts.year <= 1677 and
cmp_pandas_datetimestruct(dts, &_NS_MIN_DTS) == -1):
error = True
elif (dts.year >= 2262 and
cmp_pandas_datetimestruct(dts, &_NS_MAX_DTS) == 1):
error = True

if error:
fmt = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, dts.month,
dts.day, dts.hour,
dts.min, dts.sec)
raise OutOfBoundsDatetime(
'Out of bounds nanosecond timestamp: {fmt}'.format(fmt=fmt))


# ----------------------------------------------------------------------
# Conversion

cdef inline int64_t dtstruct_to_dt64(pandas_datetimestruct* dts) nogil:
"""Convenience function to call pandas_datetimestruct_to_datetime
with the by-far-most-common frequency PANDAS_FR_ns"""
return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts)


cdef inline void dt64_to_dtstruct(int64_t dt64,
pandas_datetimestruct* out) nogil:
"""Convenience function to call pandas_datetime_to_datetimestruct
with the by-far-most-common frequency PANDAS_FR_ns"""
pandas_datetime_to_datetimestruct(dt64, PANDAS_FR_ns, out)
return
22 changes: 7 additions & 15 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,8 @@ from numpy cimport ndarray, int64_t
from datetime import date as datetime_date
from datetime cimport datetime

# This is src/datetime.pxd
from datetime cimport (
PANDAS_FR_ns,
check_dts_bounds,
pandas_datetimestruct,
pandas_datetimestruct_to_datetime)
from np_datetime cimport (check_dts_bounds,
dtstruct_to_dt64, pandas_datetimestruct)

from util cimport is_string_object, get_nat

Expand Down Expand Up @@ -333,18 +329,14 @@ def array_strptime(ndarray[object] values, object fmt,
dts.us = us
dts.ps = ns * 1000

iresult[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
if check_dts_bounds(&dts):
iresult[i] = dtstruct_to_dt64(&dts)
try:
check_dts_bounds(&dts)
except ValueError:
if is_coerce:
iresult[i] = NPY_NAT
continue
else:
from pandas._libs.tslib import OutOfBoundsDatetime
fmt = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, dts.month,
dts.day, dts.hour,
dts.min, dts.sec)
raise OutOfBoundsDatetime(
'Out of bounds nanosecond timestamp: %s' % fmt)
raise

return result

Expand Down
27 changes: 15 additions & 12 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,7 @@ class CheckSDist(sdist_class):
'pandas/_libs/sparse.pyx',
'pandas/_libs/parsers.pyx',
'pandas/_libs/tslibs/strptime.pyx',
'pandas/_libs/tslibs/np_datetime.pyx',
'pandas/_libs/tslibs/timedeltas.pyx',
'pandas/_libs/tslibs/timezones.pyx',
'pandas/_libs/tslibs/fields.pyx',
Expand Down Expand Up @@ -469,12 +470,11 @@ def pxd(name):
'pandas/_libs/src/parse_helper.h',
'pandas/_libs/src/compat_helper.h']


tseries_depends = ['pandas/_libs/src/datetime/np_datetime.h',
'pandas/_libs/src/datetime/np_datetime_strings.h',
'pandas/_libs/src/datetime.pxd']
npdt_srces = ['pandas/_libs/src/datetime/np_datetime.c',
'pandas/_libs/src/datetime/np_datetime_strings.c']
np_datetime_headers = ['pandas/_libs/src/datetime/np_datetime.h',
'pandas/_libs/src/datetime/np_datetime_strings.h']
np_datetime_sources = ['pandas/_libs/src/datetime/np_datetime.c',
'pandas/_libs/src/datetime/np_datetime_strings.c']
tseries_depends = np_datetime_headers + ['pandas/_libs/src/datetime.pxd']

# some linux distros require it
libraries = ['m'] if not is_platform_windows() else []
Expand All @@ -489,28 +489,31 @@ def pxd(name):
_pxi_dep['hashtable'])},
'_libs.tslibs.strptime': {'pyxfile': '_libs/tslibs/strptime',
'depends': tseries_depends,
'sources': npdt_srces},
'sources': np_datetime_sources},
'_libs.tslibs.offsets': {'pyxfile': '_libs/tslibs/offsets'},
'_libs.tslib': {'pyxfile': '_libs/tslib',
'pxdfiles': ['_libs/src/util', '_libs/lib'],
'depends': tseries_depends,
'sources': npdt_srces},
'sources': np_datetime_sources},
'_libs.tslibs.np_datetime': {'pyxfile': '_libs/tslibs/np_datetime',
'depends': np_datetime_headers,
'sources': np_datetime_sources},
'_libs.tslibs.timedeltas': {'pyxfile': '_libs/tslibs/timedeltas'},
'_libs.tslibs.timezones': {'pyxfile': '_libs/tslibs/timezones'},
'_libs.tslibs.fields': {'pyxfile': '_libs/tslibs/fields',
'depends': tseries_depends,
'sources': npdt_srces},
'sources': np_datetime_sources},
'_libs.period': {'pyxfile': '_libs/period',
'depends': (tseries_depends +
['pandas/_libs/src/period_helper.h']),
'sources': npdt_srces + [
'sources': np_datetime_sources + [
'pandas/_libs/src/period_helper.c']},
'_libs.tslibs.parsing': {'pyxfile': '_libs/tslibs/parsing',
'pxdfiles': ['_libs/src/util']},
'_libs.tslibs.frequencies': {'pyxfile': '_libs/tslibs/frequencies',
'pxdfiles': ['_libs/src/util']},
'_libs.index': {'pyxfile': '_libs/index',
'sources': npdt_srces,
'sources': np_datetime_sources,
'pxdfiles': ['_libs/src/util', '_libs/hashtable'],
'depends': _pxi_dep['index']},
'_libs.algos': {'pyxfile': '_libs/algos',
Expand Down Expand Up @@ -623,7 +626,7 @@ def pxd(name):
'pandas/_libs/src/ujson/python/JSONtoObj.c',
'pandas/_libs/src/ujson/lib/ultrajsonenc.c',
'pandas/_libs/src/ujson/lib/ultrajsondec.c'] +
npdt_srces),
np_datetime_sources),
include_dirs=(['pandas/_libs/src/ujson/python',
'pandas/_libs/src/ujson/lib',
'pandas/_libs/src/datetime'] +
Expand Down