Skip to content

direct conversions for dt64 units #19487

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions pandas/_libs/src/datetime/np_datetime.c
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,7 @@ NPY_NO_EXPORT void add_seconds_to_datetimestruct(pandas_datetimestruct *dts,
* Fills in the year, month, day in 'dts' based on the days
* offset from 1970.
*/
static void set_datetimestruct_days(npy_int64 days,
pandas_datetimestruct *dts) {
void set_datetimestruct_days(npy_int64 days, pandas_datetimestruct *dts) {
const int *month_lengths;
int i;

Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/src/datetime/np_datetime.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ void pandas_timedelta_to_timedeltastruct(npy_timedelta val,
PANDAS_DATETIMEUNIT fr,
pandas_timedeltastruct *result);

void set_datetimestruct_days(npy_int64 days, pandas_datetimestruct *dts);

int dayofweek(int y, int m, int d);

extern const int days_per_month_table[2][12];
Expand Down
14 changes: 4 additions & 10 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ PyDateTime_IMPORT

from np_datetime cimport (check_dts_bounds,
pandas_datetimestruct,
pandas_datetime_to_datetimestruct, _string_to_dts,
_string_to_dts,
PANDAS_DATETIMEUNIT, PANDAS_FR_ns,
npy_datetime,
dt64_to_dtstruct, dtstruct_to_dt64,
get_datetime64_unit, get_datetime64_value,
pydatetime_to_dt64)
pydatetime_to_dt64, convert_to_ns)

from util cimport (is_string_object,
is_datetime64_object,
Expand Down Expand Up @@ -60,17 +60,14 @@ cdef inline int64_t get_datetime64_nanos(object val) except? -1:
value to nanoseconds if necessary.
"""
cdef:
pandas_datetimestruct dts
PANDAS_DATETIMEUNIT unit
npy_datetime ival

unit = get_datetime64_unit(val)
ival = get_datetime64_value(val)

if unit != PANDAS_FR_ns:
pandas_datetime_to_datetimestruct(ival, unit, &dts)
check_dts_bounds(&dts)
ival = dtstruct_to_dt64(&dts)
ival = convert_to_ns(ival, unit)

return ival

Expand All @@ -93,7 +90,6 @@ def ensure_datetime64ns(ndarray arr, copy=True):
Py_ssize_t i, n = arr.size
ndarray[int64_t] ivalues, iresult
PANDAS_DATETIMEUNIT unit
pandas_datetimestruct dts

shape = (<object> arr).shape

Expand All @@ -113,9 +109,7 @@ def ensure_datetime64ns(ndarray arr, copy=True):
else:
for i in range(n):
if ivalues[i] != NPY_NAT:
pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts)
iresult[i] = dtstruct_to_dt64(&dts)
check_dts_bounds(&dts)
iresult[i] = convert_to_ns(ivalues[i], unit)
else:
iresult[i] = NPY_NAT

Expand Down
5 changes: 1 addition & 4 deletions pandas/_libs/tslibs/np_datetime.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@ cdef extern from "../src/datetime/np_datetime.h":
PANDAS_FR_fs
PANDAS_FR_as

void pandas_datetime_to_datetimestruct(npy_datetime val,
PANDAS_DATETIMEUNIT fr,
pandas_datetimestruct *result) nogil

int days_per_month_table[2][12]
int dayofweek(int y, int m, int d) nogil
int is_leapyear(int64_t year) nogil
Expand All @@ -65,6 +61,7 @@ cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1

cdef check_dts_bounds(pandas_datetimestruct *dts)

cdef int64_t convert_to_ns(int64_t val, PANDAS_DATETIMEUNIT unit) except? -1
cdef int64_t dtstruct_to_dt64(pandas_datetimestruct* dts) nogil
cdef void dt64_to_dtstruct(int64_t dt64, pandas_datetimestruct* out) nogil
cdef void td64_to_tdstruct(int64_t td64, pandas_timedeltastruct* out) nogil
Expand Down
132 changes: 132 additions & 0 deletions pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# -*- coding: utf-8 -*-
# cython: profile=False
cimport cython
from cython cimport Py_ssize_t

from cpython cimport (Py_EQ, Py_NE, Py_GE, Py_GT, Py_LT, Py_LE,
PyUnicode_Check, PyUnicode_AsASCIIString)
Expand All @@ -13,6 +15,7 @@ from cpython.datetime cimport (datetime, date,
PyDateTime_DATE_GET_MICROSECOND)
PyDateTime_IMPORT

import numpy as np
from numpy cimport int64_t

cdef extern from "../src/datetime/np_datetime.h":
Expand All @@ -33,6 +36,9 @@ cdef extern from "../src/datetime/np_datetime.h":
) nogil

pandas_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS
void set_datetimestruct_days(int64_t days,
pandas_datetimestruct *dts) nogil


cdef extern from "../src/datetime/np_datetime_strings.h":
int parse_iso_8601_datetime(char *str, int len,
Expand Down Expand Up @@ -199,3 +205,129 @@ cdef inline int _cstring_to_dts(char *val, int length,
result = parse_iso_8601_datetime(val, length,
dts, out_local, out_tzoffset)
return result


# ----------------------------------------------------------------------
# Unit Conversion
cdef datetime EPOCH = datetime(1970, 1, 1)

cdef int64_t* _coeffs = [0, # PANDAS_FR_Y
0, # PANDAS_FR_M
7 * 24 * 3600 * 1000 * 1000 * 1000, # PANDAS_FR_W
0, # NPY_FR_B dummy
24 * 3600 * 1000 * 1000 * 1000, # PANDAS_FR_D
3600 * 1000 * 1000 * 1000L, # PANDAS_FR_h
60 * 1000 * 1000 * 1000L, # PANDAS_FR_m
1000 * 1000 * 1000, # PANDAS_FR_s
1000 * 1000L, # PANDAS_FR_ms
1000, # PANDAS_FR_us
1, # PANDAS_FR_ns
# From here down we divide instead of multiply
1000, # PANDAS_FR_ps
1000 * 1000, # PANDAS_FR_fs
1000 * 1000 * 1000] # PANDAS_FR_as

# The largest absolute values these can take _without_ raising.
cdef int64_t* _bounds = [292, # PANDAS_FR_Y dummy
3507, # PANDAS_FR_M dummy
15250, # PANDAS_FR_W
0, # NPY_FR_B dummy
106751, # PANDAS_FR_D
2562047, # PANDAS_FR_h
153722867, # PANDAS_FR_m
9223372036, # PANDAS_FR_s
9223372036854, # PANDAS_FR_ms
9223372036854775, # PANDAS_FR_us
9223372036854775807, # PANDAS_FR_ns
9223372036854775807, # PANDAS_FR_ps
9223372036854775807, # PANDAS_FR_fs
9223372036854775807] # PANDAS_FR_as

# Type names for the np.datetime64 types that are liable to overflow;
# used so we can render the correct exception message
cdef dict type_names = {PANDAS_FR_Y: 'Y', PANDAS_FR_M: 'M', PANDAS_FR_W: 'W',
PANDAS_FR_D: 'D', PANDAS_FR_h: 'h', PANDAS_FR_m: 'm',
PANDAS_FR_s: 's', PANDAS_FR_ms: 'ms',
PANDAS_FR_us: 'us'}


cdef int64_t convert_to_ns(int64_t val, PANDAS_DATETIMEUNIT unit) except? -1:
"""Convert the int64_t representation of a timestamp with the given unit
to a representation using PANDAS_FR_ns.
"""
cdef:
datetime dt
int64_t year, month
int64_t coeff, bound

bound = _bounds[<Py_ssize_t>unit]
if abs(val) > bound:
unit_name = type_names[unit]
val_ns = np.datetime64(val, unit_name).astype('datetime64[ns]')
fmt = str(val_ns).replace('T', ' ')
raise OutOfBoundsDatetime('Out of bounds nanosecond timestamp: '
'{fmt}'.format(fmt=fmt))

if unit == PANDAS_FR_Y:
dt = datetime(1970 + val, 1, 1)
return int((dt - EPOCH).total_seconds() * 1e9)

elif unit == PANDAS_FR_M:
if val >= 0:
year = 1970 + val // 12
month = val % 12 + 1
else:
year = 1969 + (val + 1) // 12
month = 12 + (val + 1) % 12

dt = datetime(year, month, 1)
return int((dt - EPOCH).total_seconds() * 1e9)

elif unit < PANDAS_FR_ns:
coeff = _coeffs[<Py_ssize_t>unit]
return val * coeff

elif unit > PANDAS_FR_ns:
# no risk of overflows
coeff = _coeffs[<Py_ssize_t>unit]
return val // coeff


@cython.cdivision
cdef int convert_datetime_to_dtstruct(int64_t dt, pandas_datetimestruct *out):
"""
convert a nanosecond (PANDAS_FR_ns) int64_t timestamp to
a pandas_datetimestruct

Parameters
----------
dt : int64_t
out : pandas_datetimestruct*

Returns
-------
code : 0 on success
"""
cdef:
int64_t perday = 24LL * 60LL * 60LL * 1000LL * 1000LL * 1000LL

# Note that care must be taken with the / and % operators
# for negative values.

if dt >= 0:
set_datetimestruct_days(dt / perday, out)
dt = dt % perday;
else:
if dt % perday == 0:
set_datetimestruct_days(dt / perday - 0, out)
else:
set_datetimestruct_days(dt / perday - 1, out)
dt = (perday - 1) + (dt + 1) % perday

out.hour = dt / (60 * 60 * 1000000000LL)
out.min = (dt / (60 * 1000000000LL)) % 60
out.sec = (dt / 1000000000LL) % 60
out.us = (dt / 1000LL) % 1000000LL
out.ps = (dt % 1000LL) * 1000

return 0