Skip to content

Implement npy_dtime.pyx #17805

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Oct 29, 2017
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 3 additions & 13 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ from datetime cimport (
npy_datetime,
is_leapyear,
dayofweek,
check_dts_bounds,
PANDAS_FR_ns,
PyDateTime_Check, PyDate_Check,
PyDelta_Check, # PyDelta_Check(x) --> isinstance(x, timedelta)
Expand All @@ -59,6 +58,9 @@ from datetime cimport (
from datetime import timedelta, datetime
from datetime import time as datetime_time

from tslibs.np_datetime cimport check_dts_bounds as _check_dts_bounds
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there a reason for the rename?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... because you told me to change it

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the de-privatize is what I am referring. I don't recall asking for that and its not necessary.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh. De-privatized because it isn't actually private when is this module. But I'll revert it anyway.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

de-privatize here (you did elsewhere)

from tslibs.np_datetime import OutOfBoundsDatetime

from khash cimport (
khiter_t,
kh_destroy_int64, kh_put_int64,
Expand Down Expand Up @@ -1853,18 +1855,6 @@ cpdef inline object _localize_pydatetime(object dt, object tz):
return dt.replace(tzinfo=tz)


class OutOfBoundsDatetime(ValueError):
pass

cdef inline _check_dts_bounds(pandas_datetimestruct *dts):
if check_dts_bounds(dts):
fmt = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, dts.month,
dts.day, dts.hour,
dts.min, dts.sec)
raise OutOfBoundsDatetime(
'Out of bounds nanosecond timestamp: %s' % fmt)


def datetime_to_datetime64(ndarray[object] values):
cdef:
Py_ssize_t i, n = len(values)
Expand Down
16 changes: 16 additions & 0 deletions pandas/_libs/tslibs/np_datetime.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# -*- coding: utf-8 -*-
# cython: profile=False

from numpy cimport int64_t, int32_t
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you could actually just call this module util i think

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to avoid name overlap with the existing libs/src/utilmodule. Also because in the dev branch I've ported src/util to a pure-cython (no C deps--> much simpler setup.py) tslibs.util. Don't want to get those mixed up.

My first choice is still the original npy_dtime, since np_datetime overlaps with the existing libs/src/datetime/np_datetime files.



cdef extern from "../src/datetime/np_datetime.h":
ctypedef struct pandas_datetimestruct:
int64_t year
int32_t month, day, hour, min, sec, us, ps, as


cdef check_dts_bounds(pandas_datetimestruct *dts)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you need a void here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, saw your note, pls add to the list to add this as void (if possible / needed), not sure.


cdef int64_t dtstruct_to_dt64(pandas_datetimestruct* dts) nogil
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where these nogil before?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The analogous src/datetime functions are, yes.

cdef void dt64_to_dtstruct(int64_t dt64, pandas_datetimestruct* out) nogil
81 changes: 81 additions & 0 deletions pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# -*- coding: utf-8 -*-
# cython: profile=False

from numpy cimport int64_t

cdef extern from "numpy/ndarrayobject.h":
ctypedef int64_t npy_timedelta
ctypedef int64_t npy_datetime

cdef extern from "../src/datetime/np_datetime.h":
ctypedef enum PANDAS_DATETIMEUNIT:
PANDAS_FR_Y
PANDAS_FR_M
PANDAS_FR_W
PANDAS_FR_D
PANDAS_FR_B
PANDAS_FR_h
PANDAS_FR_m
PANDAS_FR_s
PANDAS_FR_ms
PANDAS_FR_us
PANDAS_FR_ns
PANDAS_FR_ps
PANDAS_FR_fs
PANDAS_FR_as

int cmp_pandas_datetimestruct(pandas_datetimestruct *a,
pandas_datetimestruct *b)

npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr,
pandas_datetimestruct *d
) nogil

void pandas_datetime_to_datetimestruct(npy_datetime val,
PANDAS_DATETIMEUNIT fr,
pandas_datetimestruct *result) nogil

pandas_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS

# ----------------------------------------------------------------------


class OutOfBoundsDatetime(ValueError):
pass


cdef inline check_dts_bounds(pandas_datetimestruct *dts):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

void?

"""Returns True if an error needs to be raised"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a doc-string, and change this (which is wrong), it will just raise if there is an oob date

cdef:
bint error = False

if (dts.year <= 1677 and
cmp_pandas_datetimestruct(dts, &_NS_MIN_DTS) == -1):
error = True
elif (dts.year >= 2262 and
cmp_pandas_datetimestruct(dts, &_NS_MAX_DTS) == 1):
error = True

if error:
fmt = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, dts.month,
dts.day, dts.hour,
dts.min, dts.sec)
raise OutOfBoundsDatetime(
'Out of bounds nanosecond timestamp: %s' % fmt)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you use {} in formatting. I would also pass dts to the OutOfBounds Constructor and format it there (a bit more idiomatic I think).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now I'll implement that half of this that I know how to do; will work on my .format-fu and follow-up with the rest.



# ----------------------------------------------------------------------
# Conversion

cdef inline int64_t dtstruct_to_dt64(pandas_datetimestruct* dts) nogil:
"""Convenience function to call pandas_datetimestruct_to_datetime
with the by-far-most-common frequency PANDAS_FR_ns"""
return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts)


cdef inline void dt64_to_dtstruct(int64_t dt64,
pandas_datetimestruct* out) nogil:
"""Convenience function to call pandas_datetime_to_datetimestruct
with the by-far-most-common frequency PANDAS_FR_ns"""
pandas_datetime_to_datetimestruct(dt64, PANDAS_FR_ns, out)
return
21 changes: 7 additions & 14 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,8 @@ from numpy cimport ndarray, int64_t
from datetime import date as datetime_date
from datetime cimport datetime

# This is src/datetime.pxd
from datetime cimport (
PANDAS_FR_ns,
check_dts_bounds,
pandas_datetimestruct,
pandas_datetimestruct_to_datetime)
from np_datetime cimport (check_dts_bounds,
dtstruct_to_dt64, pandas_datetimestruct)

from util cimport is_string_object, get_nat

Expand Down Expand Up @@ -333,18 +329,15 @@ def array_strptime(ndarray[object] values, object fmt,
dts.us = us
dts.ps = ns * 1000

iresult[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
if check_dts_bounds(&dts):
iresult[i] = dtstruct_to_dt64(&dts)
try:
check_dts_bounds(&dts)
except ValueError:
if is_coerce:
iresult[i] = NPY_NAT
continue
else:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can remove the else (as we have a continue)

from pandas._libs.tslib import OutOfBoundsDatetime
fmt = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, dts.month,
dts.day, dts.hour,
dts.min, dts.sec)
raise OutOfBoundsDatetime(
'Out of bounds nanosecond timestamp: %s' % fmt)
raise

return result

Expand Down
4 changes: 4 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,7 @@ class CheckSDist(sdist_class):
'pandas/_libs/sparse.pyx',
'pandas/_libs/parsers.pyx',
'pandas/_libs/tslibs/strptime.pyx',
'pandas/_libs/tslibs/np_datetime.pyx',
'pandas/_libs/tslibs/timedeltas.pyx',
'pandas/_libs/tslibs/timezones.pyx',
'pandas/_libs/tslibs/fields.pyx',
Expand Down Expand Up @@ -493,6 +494,9 @@ def pxd(name):
'pxdfiles': ['_libs/src/util', '_libs/lib'],
'depends': tseries_depends,
'sources': npdt_srces},
'_libs.tslibs.np_datetime': {'pyxfile': '_libs/tslibs/np_datetime',
'depends': tseries_depends[:2],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rather be explict here rather than depend on the ordering (you can also make a ctseries_depends or whatever and use that too).

'sources': npdt_srces},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just spell things out, np_datetime_sources is much more readable (and change all references)

'_libs.tslibs.timedeltas': {'pyxfile': '_libs/tslibs/timedeltas'},
'_libs.tslibs.timezones': {'pyxfile': '_libs/tslibs/timezones'},
'_libs.tslibs.fields': {'pyxfile': '_libs/tslibs/fields',
Expand Down