Skip to content

Implement numpy_helper functions directly in cython #18059

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 6, 2017
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,8 @@ from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
PyTime_Check, PyDelta_Check,
PyDateTime_IMPORT)
PyDateTime_IMPORT
# this is our tseries.pxd
from datetime cimport get_timedelta64_value, get_datetime64_value

from tslibs.np_datetime cimport get_timedelta64_value, get_datetime64_value
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so this now makes lib depend on np_datetime.pxd, which is ok as tseries_depends now lists this.


from tslib cimport _check_all_nulls
import tslib
Expand Down
63 changes: 6 additions & 57 deletions pandas/_libs/src/datetime.pxd
Original file line number Diff line number Diff line change
@@ -1,52 +1,20 @@
# cython: profile=False
from numpy cimport int64_t, int32_t, npy_int64, npy_int32, ndarray
from cpython cimport PyObject
from numpy cimport int64_t, npy_int64, npy_int32

from cpython cimport PyUnicode_Check, PyUnicode_AsASCIIString


cdef extern from "datetime.h":

ctypedef class datetime.date [object PyDateTime_Date]:
pass

ctypedef class datetime.datetime [object PyDateTime_DateTime]:
pass

ctypedef class datetime.timedelta [object PyDateTime_Delta]:
pass

void PyDateTime_IMPORT()

int PyDateTime_GET_YEAR(date)
int PyDateTime_GET_MONTH(date)
int PyDateTime_GET_DAY(date)
int PyDateTime_DATE_GET_HOUR(object o)
int PyDateTime_DATE_GET_MINUTE(object o)
int PyDateTime_DATE_GET_SECOND(object o)
int PyDateTime_DATE_GET_MICROSECOND(object o)
int PyDateTime_TIME_GET_HOUR(object o)
int PyDateTime_TIME_GET_MINUTE(object o)
int PyDateTime_TIME_GET_SECOND(object o)
int PyDateTime_TIME_GET_MICROSECOND(object o)
bint PyDateTime_Check(object o)
bint PyDate_Check(object o)
bint PyTime_Check(object o)
bint PyDelta_Check(object o)
object PyDateTime_FromDateAndTime(int year, int month, int day, int hour,
int minute, int second, int us)

cdef extern from "numpy/ndarrayobject.h":

ctypedef int64_t npy_timedelta
ctypedef int64_t npy_datetime

ctypedef enum NPY_CASTING:
NPY_NO_CASTING
NPY_EQUIV_CASTING
NPY_SAFE_CASTING
NPY_SAME_KIND_CASTING
NPY_UNSAFE_CASTING
NPY_NO_CASTING
NPY_EQUIV_CASTING
NPY_SAFE_CASTING
NPY_SAME_KIND_CASTING
NPY_UNSAFE_CASTING


cdef extern from "numpy_helper.h":
Expand Down Expand Up @@ -79,9 +47,6 @@ cdef extern from "datetime/np_datetime.h":
npy_int64 year
npy_int32 month, day, hour, min, sec, us, ps, as

int cmp_pandas_datetimestruct(pandas_datetimestruct *a,
pandas_datetimestruct *b)

npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr,
pandas_datetimestruct *d) nogil
void pandas_datetime_to_datetimestruct(npy_datetime val,
Expand All @@ -102,8 +67,6 @@ cdef extern from "datetime/np_datetime_strings.h":
PANDAS_DATETIMEUNIT *out_bestunit,
npy_bool *out_special)

# int parse_python_string(object obj, pandas_datetimestruct *out) except -1




Expand Down Expand Up @@ -134,17 +97,3 @@ cdef inline int _cstring_to_dts(char *val, int length,
NPY_UNSAFE_CASTING,
dts, out_local, out_tzoffset, &out_bestunit, &special)
return result


cdef inline bint check_dts_bounds(pandas_datetimestruct *dts):
"""Returns True if an error needs to be raised"""
cdef:
bint error = False

if (dts.year <= 1677 and
cmp_pandas_datetimestruct(dts, &_NS_MIN_DTS) == -1):
error = True
elif (dts.year >= 2262 and
cmp_pandas_datetimestruct(dts, &_NS_MAX_DTS) == 1):
error = True
return error
9 changes: 4 additions & 5 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,8 @@ PyDateTime_IMPORT
from datetime cimport (
pandas_datetime_to_datetimestruct,
days_per_month_table,
get_datetime64_value,
get_timedelta64_value,
get_datetime64_unit,
PANDAS_DATETIMEUNIT,
_string_to_dts,
npy_datetime,
is_leapyear,
dayofweek,
PANDAS_FR_ns)
Expand All @@ -57,7 +53,10 @@ from datetime import time as datetime_time
from tslibs.np_datetime cimport (check_dts_bounds,
pandas_datetimestruct,
dt64_to_dtstruct, dtstruct_to_dt64,
pydatetime_to_dt64, pydate_to_dt64)
pydatetime_to_dt64, pydate_to_dt64,
npy_datetime,
get_datetime64_unit, get_datetime64_value,
get_timedelta64_value)
from tslibs.np_datetime import OutOfBoundsDatetime

from khash cimport (
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/conversion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from numpy cimport int64_t

from datetime cimport pandas_datetimestruct
from np_datetime cimport pandas_datetimestruct


cdef class _TSObject:
Expand Down
59 changes: 59 additions & 0 deletions pandas/_libs/tslibs/np_datetime.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,67 @@ from cpython.datetime cimport date, datetime

from numpy cimport int64_t, int32_t

cdef extern from "numpy/ndarrayobject.h":
ctypedef int64_t npy_timedelta
ctypedef int64_t npy_datetime

cdef extern from "numpy/ndarraytypes.h":
# ctypedef struct npy_datetimestruct:
# int64_t year
# int32_t month, day, hour, min, sec, us, ps, as

ctypedef enum NPY_DATETIMEUNIT:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this enum needed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it's leftover from an earlier revision. Will remove.

NPY_FR_Y = 0 # Years
NPY_FR_M = 1 # Months
NPY_FR_W = 2 # Weeks
# Gap where 1.6 NPY_FR_B (value 3) was
NPY_FR_D = 4 # Days
NPY_FR_h = 5 # hours
NPY_FR_m = 6 # minutes
NPY_FR_s = 7 # seconds
NPY_FR_ms = 8 # milliseconds
NPY_FR_us = 9 # microseconds
NPY_FR_ns = 10 # nanoseconds
NPY_FR_ps = 11 # picoseconds
NPY_FR_fs = 12 # femtoseconds
NPY_FR_as = 13 # attoseconds
NPY_FR_GENERIC = 14 # Generic, unbound units, can convert to anything

ctypedef struct PyArray_DatetimeMetaData:
PANDAS_DATETIMEUNIT base
int64_t num

cdef extern from "numpy/arrayscalars.h":
ctypedef struct PyDatetimeScalarObject:
# PyObject_HEAD
npy_datetime obval
PyArray_DatetimeMetaData obmeta

ctypedef struct PyTimedeltaScalarObject:
# PyObject_HEAD
npy_timedelta obval
PyArray_DatetimeMetaData obmeta

cdef extern from "../src/datetime/np_datetime.h":
ctypedef struct pandas_datetimestruct:
int64_t year
int32_t month, day, hour, min, sec, us, ps, as

ctypedef enum PANDAS_DATETIMEUNIT:
PANDAS_FR_Y
PANDAS_FR_M
PANDAS_FR_W
PANDAS_FR_D
PANDAS_FR_B
PANDAS_FR_h
PANDAS_FR_m
PANDAS_FR_s
PANDAS_FR_ms
PANDAS_FR_us
PANDAS_FR_ns
PANDAS_FR_ps
PANDAS_FR_fs
PANDAS_FR_as

cdef check_dts_bounds(pandas_datetimestruct *dts)

Expand All @@ -19,3 +74,7 @@ cdef void dt64_to_dtstruct(int64_t dt64, pandas_datetimestruct* out) nogil

cdef int64_t pydatetime_to_dt64(datetime val, pandas_datetimestruct *dts)
cdef int64_t pydate_to_dt64(date val, pandas_datetimestruct *dts)

cdef npy_datetime get_datetime64_value(object obj) nogil
cdef npy_timedelta get_timedelta64_value(object obj) nogil
cdef PANDAS_DATETIMEUNIT get_datetime64_unit(object obj) nogil
48 changes: 28 additions & 20 deletions pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,7 @@ PyDateTime_IMPORT

from numpy cimport int64_t

cdef extern from "numpy/ndarrayobject.h":
ctypedef int64_t npy_timedelta
ctypedef int64_t npy_datetime

cdef extern from "../src/datetime/np_datetime.h":
ctypedef enum PANDAS_DATETIMEUNIT:
PANDAS_FR_Y
PANDAS_FR_M
PANDAS_FR_W
PANDAS_FR_D
PANDAS_FR_B
PANDAS_FR_h
PANDAS_FR_m
PANDAS_FR_s
PANDAS_FR_ms
PANDAS_FR_us
PANDAS_FR_ns
PANDAS_FR_ps
PANDAS_FR_fs
PANDAS_FR_as

int cmp_pandas_datetimestruct(pandas_datetimestruct *a,
pandas_datetimestruct *b)

Expand All @@ -47,6 +27,34 @@ cdef extern from "../src/datetime/np_datetime.h":
pandas_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS

# ----------------------------------------------------------------------
# numpy object inspection

cdef inline npy_datetime get_datetime64_value(object obj) nogil:
"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you make the docstrings more descriptive? Something like "returns int64 value underlying numpy datetime scalar Python object"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea, will change.

Adapted from numpy_helper.h version:

PANDAS_INLINE npy_datetime get_datetime64_value(PyObject* obj) {
return ((PyDatetimeScalarObject*)obj)->obval;
}
"""
return (<PyDatetimeScalarObject*>obj).obval


cdef inline npy_timedelta get_timedelta64_value(object obj) nogil:
"""
Adapted from numpy_helper.h version:

PANDAS_INLINE npy_timedelta get_timedelta64_value(PyObject* obj) {
return ((PyTimedeltaScalarObject*)obj)->obval;
}
"""
return (<PyTimedeltaScalarObject*>obj).obval


cdef inline PANDAS_DATETIMEUNIT get_datetime64_unit(object obj) nogil:
return <PANDAS_DATETIMEUNIT>(<PyDatetimeScalarObject*>obj).obmeta.base

# ----------------------------------------------------------------------


class OutOfBoundsDatetime(ValueError):
Expand Down