Skip to content

Implement npy_dtime.pyx #17805

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Oct 29, 2017
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 18 additions & 28 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ from datetime cimport (
npy_datetime,
is_leapyear,
dayofweek,
check_dts_bounds,
PANDAS_FR_ns,
PyDateTime_Check, PyDate_Check,
PyDelta_Check, # PyDelta_Check(x) --> isinstance(x, timedelta)
Expand All @@ -59,6 +58,9 @@ from datetime cimport (
from datetime import timedelta, datetime
from datetime import time as datetime_time

from tslibs.np_datetime cimport check_dts_bounds
from tslibs.np_datetime import OutOfBoundsDatetime

from khash cimport (
khiter_t,
kh_destroy_int64, kh_put_int64,
Expand Down Expand Up @@ -733,7 +735,7 @@ class Timestamp(_Timestamp):
ts = convert_datetime_to_tsobject(ts_input, _tzinfo)
value = ts.value + (dts.ps // 1000)
if value != NPY_NAT:
_check_dts_bounds(&dts)
check_dts_bounds(&dts)

return create_timestamp_from_ts(value, dts, _tzinfo, self.freq)

Expand Down Expand Up @@ -1601,7 +1603,7 @@ cdef convert_to_tsobject(object ts, object tz, object unit,
'Timestamp'.format(ts, type(ts)))

if obj.value != NPY_NAT:
_check_dts_bounds(&obj.dts)
check_dts_bounds(&obj.dts)

if tz is not None:
_localize_tso(obj, tz)
Expand Down Expand Up @@ -1682,7 +1684,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz,
obj.value += nanos
obj.dts.ps = nanos * 1000

_check_dts_bounds(&obj.dts)
check_dts_bounds(&obj.dts)
return obj


Expand Down Expand Up @@ -1718,12 +1720,12 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit,
_string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset)
obj.value = pandas_datetimestruct_to_datetime(
PANDAS_FR_ns, &obj.dts)
_check_dts_bounds(&obj.dts)
check_dts_bounds(&obj.dts)
if out_local == 1:
obj.tzinfo = pytz.FixedOffset(out_tzoffset)
obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC')
if tz is None:
_check_dts_bounds(&obj.dts)
check_dts_bounds(&obj.dts)
return obj
else:
# Keep the converter same as PyDateTime's
Expand Down Expand Up @@ -1766,7 +1768,7 @@ def _test_parse_iso8601(object ts):

_string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset)
obj.value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &obj.dts)
_check_dts_bounds(&obj.dts)
check_dts_bounds(&obj.dts)
if out_local == 1:
obj.tzinfo = pytz.FixedOffset(out_tzoffset)
obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC')
Expand Down Expand Up @@ -1853,18 +1855,6 @@ cpdef inline object _localize_pydatetime(object dt, object tz):
return dt.replace(tzinfo=tz)


class OutOfBoundsDatetime(ValueError):
pass

cdef inline _check_dts_bounds(pandas_datetimestruct *dts):
if check_dts_bounds(dts):
fmt = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, dts.month,
dts.day, dts.hour,
dts.min, dts.sec)
raise OutOfBoundsDatetime(
'Out of bounds nanosecond timestamp: %s' % fmt)


def datetime_to_datetime64(ndarray[object] values):
cdef:
Py_ssize_t i, n = len(values)
Expand All @@ -1889,13 +1879,13 @@ def datetime_to_datetime64(ndarray[object] values):

_ts = convert_datetime_to_tsobject(val, None)
iresult[i] = _ts.value
_check_dts_bounds(&_ts.dts)
check_dts_bounds(&_ts.dts)
else:
if inferred_tz is not None:
raise ValueError('Cannot mix tz-aware with '
'tz-naive values')
iresult[i] = _pydatetime_to_dts(val, &dts)
_check_dts_bounds(&dts)
check_dts_bounds(&dts)
else:
raise TypeError('Unrecognized value type: %s' % type(val))

Expand Down Expand Up @@ -2208,7 +2198,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
_ts = convert_datetime_to_tsobject(val, None)
iresult[i] = _ts.value
try:
_check_dts_bounds(&_ts.dts)
check_dts_bounds(&_ts.dts)
except ValueError:
if is_coerce:
iresult[i] = NPY_NAT
Expand All @@ -2223,7 +2213,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
if is_timestamp(val):
iresult[i] += val.nanosecond
try:
_check_dts_bounds(&dts)
check_dts_bounds(&dts)
except ValueError:
if is_coerce:
iresult[i] = NPY_NAT
Expand All @@ -2233,7 +2223,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
elif PyDate_Check(val):
iresult[i] = _date_to_datetime64(val, &dts)
try:
_check_dts_bounds(&dts)
check_dts_bounds(&dts)
seen_datetime = 1
except ValueError:
if is_coerce:
Expand Down Expand Up @@ -2290,7 +2280,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
tz = pytz.FixedOffset(out_tzoffset)
value = tz_convert_single(value, tz, 'UTC')
iresult[i] = value
_check_dts_bounds(&dts)
check_dts_bounds(&dts)
except ValueError:
# if requiring iso8601 strings, skip trying other formats
if require_iso8601:
Expand Down Expand Up @@ -2389,7 +2379,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
oresult[i] = parse_datetime_string(val, dayfirst=dayfirst,
yearfirst=yearfirst)
_pydatetime_to_dts(oresult[i], &dts)
_check_dts_bounds(&dts)
check_dts_bounds(&dts)
except Exception:
if is_raise:
raise
Expand Down Expand Up @@ -3195,7 +3185,7 @@ cdef inline _get_datetime64_nanos(object val):

if unit != PANDAS_FR_ns:
pandas_datetime_to_datetimestruct(ival, unit, &dts)
_check_dts_bounds(&dts)
check_dts_bounds(&dts)
return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
else:
return ival
Expand Down Expand Up @@ -3223,7 +3213,7 @@ def cast_to_nanoseconds(ndarray arr):
if ivalues[i] != NPY_NAT:
pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts)
iresult[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
_check_dts_bounds(&dts)
check_dts_bounds(&dts)
else:
iresult[i] = NPY_NAT

Expand Down
16 changes: 16 additions & 0 deletions pandas/_libs/tslibs/np_datetime.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# -*- coding: utf-8 -*-
# cython: profile=False

from numpy cimport int64_t, int32_t
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you could actually just call this module util i think

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to avoid name overlap with the existing libs/src/utilmodule. Also because in the dev branch I've ported src/util to a pure-cython (no C deps--> much simpler setup.py) tslibs.util. Don't want to get those mixed up.

My first choice is still the original npy_dtime, since np_datetime overlaps with the existing libs/src/datetime/np_datetime files.



cdef extern from "../src/datetime/np_datetime.h":
ctypedef struct pandas_datetimestruct:
int64_t year
int32_t month, day, hour, min, sec, us, ps, as


cdef void check_dts_bounds(pandas_datetimestruct *dts)

cdef int64_t dtstruct_to_dt64(pandas_datetimestruct* dts) nogil
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where these nogil before?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The analogous src/datetime functions are, yes.

cdef void dt64_to_dtstruct(int64_t dt64, pandas_datetimestruct* out) nogil
81 changes: 81 additions & 0 deletions pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# -*- coding: utf-8 -*-
# cython: profile=False

from numpy cimport int64_t

cdef extern from "numpy/ndarrayobject.h":
ctypedef int64_t npy_timedelta
ctypedef int64_t npy_datetime

cdef extern from "../src/datetime/np_datetime.h":
ctypedef enum PANDAS_DATETIMEUNIT:
PANDAS_FR_Y
PANDAS_FR_M
PANDAS_FR_W
PANDAS_FR_D
PANDAS_FR_B
PANDAS_FR_h
PANDAS_FR_m
PANDAS_FR_s
PANDAS_FR_ms
PANDAS_FR_us
PANDAS_FR_ns
PANDAS_FR_ps
PANDAS_FR_fs
PANDAS_FR_as

int cmp_pandas_datetimestruct(pandas_datetimestruct *a,
pandas_datetimestruct *b)

npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr,
pandas_datetimestruct *d
) nogil

void pandas_datetime_to_datetimestruct(npy_datetime val,
PANDAS_DATETIMEUNIT fr,
pandas_datetimestruct *result) nogil

pandas_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS

# ----------------------------------------------------------------------


class OutOfBoundsDatetime(ValueError):
pass


cdef inline void check_dts_bounds(pandas_datetimestruct *dts):
"""Returns True if an error needs to be raised"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a doc-string, and change this (which is wrong), it will just raise if there is an oob date

cdef:
bint error = False

if (dts.year <= 1677 and
cmp_pandas_datetimestruct(dts, &_NS_MIN_DTS) == -1):
error = True
elif (dts.year >= 2262 and
cmp_pandas_datetimestruct(dts, &_NS_MAX_DTS) == 1):
error = True

if error:
fmt = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, dts.month,
dts.day, dts.hour,
dts.min, dts.sec)
raise OutOfBoundsDatetime(
'Out of bounds nanosecond timestamp: %s' % fmt)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you use {} in formatting. I would also pass dts to the OutOfBounds Constructor and format it there (a bit more idiomatic I think).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now I'll implement that half of this that I know how to do; will work on my .format-fu and follow-up with the rest.



# ----------------------------------------------------------------------
# Conversion

cdef inline int64_t dtstruct_to_dt64(pandas_datetimestruct* dts) nogil:
"""Convenience function to call pandas_datetimestruct_to_datetime
with the by-far-most-common frequency PANDAS_FR_ns"""
return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts)


cdef inline void dt64_to_dtstruct(int64_t dt64,
pandas_datetimestruct* out) nogil:
"""Convenience function to call pandas_datetime_to_datetimestruct
with the by-far-most-common frequency PANDAS_FR_ns"""
pandas_datetime_to_datetimestruct(dt64, PANDAS_FR_ns, out)
return
22 changes: 7 additions & 15 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,8 @@ from numpy cimport ndarray, int64_t
from datetime import date as datetime_date
from datetime cimport datetime

# This is src/datetime.pxd
from datetime cimport (
PANDAS_FR_ns,
check_dts_bounds,
pandas_datetimestruct,
pandas_datetimestruct_to_datetime)
from np_datetime cimport (check_dts_bounds,
dtstruct_to_dt64, pandas_datetimestruct)

from util cimport is_string_object, get_nat

Expand Down Expand Up @@ -333,18 +329,14 @@ def array_strptime(ndarray[object] values, object fmt,
dts.us = us
dts.ps = ns * 1000

iresult[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
if check_dts_bounds(&dts):
iresult[i] = dtstruct_to_dt64(&dts)
try:
check_dts_bounds(&dts)
except ValueError:
if is_coerce:
iresult[i] = NPY_NAT
continue
else:
from pandas._libs.tslib import OutOfBoundsDatetime
fmt = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, dts.month,
dts.day, dts.hour,
dts.min, dts.sec)
raise OutOfBoundsDatetime(
'Out of bounds nanosecond timestamp: %s' % fmt)
raise

return result

Expand Down
27 changes: 15 additions & 12 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,7 @@ class CheckSDist(sdist_class):
'pandas/_libs/sparse.pyx',
'pandas/_libs/parsers.pyx',
'pandas/_libs/tslibs/strptime.pyx',
'pandas/_libs/tslibs/np_datetime.pyx',
'pandas/_libs/tslibs/timedeltas.pyx',
'pandas/_libs/tslibs/timezones.pyx',
'pandas/_libs/tslibs/fields.pyx',
Expand Down Expand Up @@ -468,12 +469,11 @@ def pxd(name):
'pandas/_libs/src/parse_helper.h',
'pandas/_libs/src/compat_helper.h']


tseries_depends = ['pandas/_libs/src/datetime/np_datetime.h',
'pandas/_libs/src/datetime/np_datetime_strings.h',
'pandas/_libs/src/datetime.pxd']
npdt_srces = ['pandas/_libs/src/datetime/np_datetime.c',
'pandas/_libs/src/datetime/np_datetime_strings.c']
np_datetime_headers = ['pandas/_libs/src/datetime/np_datetime.h',
'pandas/_libs/src/datetime/np_datetime_strings.h',]
np_datetime_sources = ['pandas/_libs/src/datetime/np_datetime.c',
'pandas/_libs/src/datetime/np_datetime_strings.c']
tseries_depends = np_datetime_headers + ['pandas/_libs/src/datetime.pxd']

# some linux distros require it
libraries = ['m'] if not is_platform_windows() else []
Expand All @@ -488,27 +488,30 @@ def pxd(name):
_pxi_dep['hashtable'])},
'_libs.tslibs.strptime': {'pyxfile': '_libs/tslibs/strptime',
'depends': tseries_depends,
'sources': npdt_srces},
'sources': np_datetime_sources},
'_libs.tslib': {'pyxfile': '_libs/tslib',
'pxdfiles': ['_libs/src/util', '_libs/lib'],
'depends': tseries_depends,
'sources': npdt_srces},
'sources': np_datetime_sources},
'_libs.tslibs.np_datetime': {'pyxfile': '_libs/tslibs/np_datetime',
'depends': np_datetime_headers,
'sources': np_datetime_sources},
'_libs.tslibs.timedeltas': {'pyxfile': '_libs/tslibs/timedeltas'},
'_libs.tslibs.timezones': {'pyxfile': '_libs/tslibs/timezones'},
'_libs.tslibs.fields': {'pyxfile': '_libs/tslibs/fields',
'depends': tseries_depends,
'sources': npdt_srces},
'sources': np_datetime_sources},
'_libs.period': {'pyxfile': '_libs/period',
'depends': (tseries_depends +
['pandas/_libs/src/period_helper.h']),
'sources': npdt_srces + [
'sources': np_datetime_sources + [
'pandas/_libs/src/period_helper.c']},
'_libs.tslibs.parsing': {'pyxfile': '_libs/tslibs/parsing',
'pxdfiles': ['_libs/src/util']},
'_libs.tslibs.frequencies': {'pyxfile': '_libs/tslibs/frequencies',
'pxdfiles': ['_libs/src/util']},
'_libs.index': {'pyxfile': '_libs/index',
'sources': npdt_srces,
'sources': np_datetime_sources,
'pxdfiles': ['_libs/src/util', '_libs/hashtable'],
'depends': _pxi_dep['index']},
'_libs.algos': {'pyxfile': '_libs/algos',
Expand Down Expand Up @@ -621,7 +624,7 @@ def pxd(name):
'pandas/_libs/src/ujson/python/JSONtoObj.c',
'pandas/_libs/src/ujson/lib/ultrajsonenc.c',
'pandas/_libs/src/ujson/lib/ultrajsondec.c'] +
npdt_srces),
np_datetime_sources),
include_dirs=(['pandas/_libs/src/ujson/python',
'pandas/_libs/src/ujson/lib',
'pandas/_libs/src/datetime'] +
Expand Down