forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathnp_datetime.pyx
372 lines (300 loc) · 11.2 KB
/
np_datetime.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
from cpython.datetime cimport (
PyDateTime_DATE_GET_HOUR,
PyDateTime_DATE_GET_MICROSECOND,
PyDateTime_DATE_GET_MINUTE,
PyDateTime_DATE_GET_SECOND,
PyDateTime_GET_DAY,
PyDateTime_GET_MONTH,
PyDateTime_GET_YEAR,
import_datetime,
)
from cpython.object cimport (
Py_EQ,
Py_GE,
Py_GT,
Py_LE,
Py_LT,
Py_NE,
)
import_datetime()
import numpy as np
cimport numpy as cnp
cnp.import_array()
from numpy cimport (
int64_t,
ndarray,
)
from pandas._libs.tslibs.util cimport get_c_string_buf_and_size
cdef extern from "src/datetime/np_datetime.h":
int cmp_npy_datetimestruct(npy_datetimestruct *a,
npy_datetimestruct *b)
# AS, FS, PS versions exist but are not imported because they are not used.
npy_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS
npy_datetimestruct _US_MIN_DTS, _US_MAX_DTS
npy_datetimestruct _MS_MIN_DTS, _MS_MAX_DTS
npy_datetimestruct _S_MIN_DTS, _S_MAX_DTS
npy_datetimestruct _M_MIN_DTS, _M_MAX_DTS
PyArray_DatetimeMetaData get_datetime_metadata_from_dtype(cnp.PyArray_Descr *dtype);
cdef extern from "src/datetime/np_datetime_strings.h":
int parse_iso_8601_datetime(const char *str, int len, int want_exc,
npy_datetimestruct *out,
NPY_DATETIMEUNIT *out_bestunit,
int *out_local, int *out_tzoffset)
# ----------------------------------------------------------------------
# numpy object inspection
cdef inline npy_datetime get_datetime64_value(object obj) nogil:
"""
returns the int64 value underlying scalar numpy datetime64 object
Note that to interpret this as a datetime, the corresponding unit is
also needed. That can be found using `get_datetime64_unit`.
"""
return (<PyDatetimeScalarObject*>obj).obval
cdef inline npy_timedelta get_timedelta64_value(object obj) nogil:
"""
returns the int64 value underlying scalar numpy timedelta64 object
"""
return (<PyTimedeltaScalarObject*>obj).obval
cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil:
"""
returns the unit part of the dtype for a numpy datetime64 object.
"""
return <NPY_DATETIMEUNIT>(<PyDatetimeScalarObject*>obj).obmeta.base
cdef NPY_DATETIMEUNIT get_unit_from_dtype(cnp.dtype dtype):
# NB: caller is responsible for ensuring this is *some* datetime64 or
# timedelta64 dtype, otherwise we can segfault
cdef:
cnp.PyArray_Descr* descr = <cnp.PyArray_Descr*>dtype
PyArray_DatetimeMetaData meta
meta = get_datetime_metadata_from_dtype(descr)
return meta.base
def py_get_unit_from_dtype(dtype):
# for testing get_unit_from_dtype; adds 896 bytes to the .so file.
return get_unit_from_dtype(dtype)
def is_unitless(dtype: cnp.dtype) -> bool:
"""
Check if a datetime64 or timedelta64 dtype has no attached unit.
"""
if dtype.type_num not in [cnp.NPY_DATETIME, cnp.NPY_TIMEDELTA]:
raise ValueError("is_unitless dtype must be datetime64 or timedelta64")
cdef:
NPY_DATETIMEUNIT unit = get_unit_from_dtype(dtype)
return unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
# ----------------------------------------------------------------------
# Comparison
cdef bint cmp_dtstructs(
npy_datetimestruct* left, npy_datetimestruct* right, int op
):
cdef:
int cmp_res
cmp_res = cmp_npy_datetimestruct(left, right)
if op == Py_EQ:
return cmp_res == 0
if op == Py_NE:
return cmp_res != 0
if op == Py_GT:
return cmp_res == 1
if op == Py_LT:
return cmp_res == -1
if op == Py_GE:
return cmp_res == 1 or cmp_res == 0
else:
# i.e. op == Py_LE
return cmp_res == -1 or cmp_res == 0
cdef inline bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1:
"""
cmp_scalar is a more performant version of PyObject_RichCompare
typed for int64_t arguments.
"""
if op == Py_EQ:
return lhs == rhs
elif op == Py_NE:
return lhs != rhs
elif op == Py_LT:
return lhs < rhs
elif op == Py_LE:
return lhs <= rhs
elif op == Py_GT:
return lhs > rhs
elif op == Py_GE:
return lhs >= rhs
class OutOfBoundsDatetime(ValueError):
"""
Raised when the datetime is outside the range that
can be represented.
"""
pass
class OutOfBoundsTimedelta(ValueError):
"""
Raised when encountering a timedelta value that cannot be represented
as a timedelta64[ns].
"""
# Timedelta analogue to OutOfBoundsDatetime
pass
cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=NPY_FR_ns):
"""Raises OutOfBoundsDatetime if the given date is outside the range that
can be represented by nanosecond-resolution 64-bit integers."""
cdef:
bint error = False
npy_datetimestruct cmp_upper, cmp_lower
if unit == NPY_FR_ns:
cmp_upper = _NS_MAX_DTS
cmp_lower = _NS_MIN_DTS
elif unit == NPY_FR_us:
cmp_upper = _US_MAX_DTS
cmp_lower = _US_MIN_DTS
elif unit == NPY_FR_ms:
cmp_upper = _MS_MAX_DTS
cmp_lower = _MS_MIN_DTS
elif unit == NPY_FR_s:
cmp_upper = _S_MAX_DTS
cmp_lower = _S_MIN_DTS
elif unit == NPY_FR_m:
cmp_upper = _M_MAX_DTS
cmp_lower = _M_MIN_DTS
else:
raise NotImplementedError(unit)
if cmp_npy_datetimestruct(dts, &cmp_lower) == -1:
error = True
elif cmp_npy_datetimestruct(dts, &cmp_upper) == 1:
error = True
if error:
fmt = (f'{dts.year}-{dts.month:02d}-{dts.day:02d} '
f'{dts.hour:02d}:{dts.min:02d}:{dts.sec:02d}')
# TODO: "nanosecond" in the message assumes NPY_FR_ns
raise OutOfBoundsDatetime(f'Out of bounds nanosecond timestamp: {fmt}')
# ----------------------------------------------------------------------
# Conversion
cdef inline int64_t dtstruct_to_dt64(npy_datetimestruct* dts) nogil:
"""Convenience function to call npy_datetimestruct_to_datetime
with the by-far-most-common frequency NPY_FR_ns"""
return npy_datetimestruct_to_datetime(NPY_FR_ns, dts)
cdef inline void dt64_to_dtstruct(int64_t dt64,
npy_datetimestruct* out) nogil:
"""Convenience function to call pandas_datetime_to_datetimestruct
with the by-far-most-common frequency NPY_FR_ns"""
pandas_datetime_to_datetimestruct(dt64, NPY_FR_ns, out)
return
# just exposed for testing at the moment
def py_td64_to_tdstruct(int64_t td64, NPY_DATETIMEUNIT unit):
cdef:
pandas_timedeltastruct tds
pandas_timedelta_to_timedeltastruct(td64, unit, &tds)
return tds # <- returned as a dict to python
cdef inline int64_t pydatetime_to_dt64(datetime val,
npy_datetimestruct *dts):
"""
Note we are assuming that the datetime object is timezone-naive.
"""
dts.year = PyDateTime_GET_YEAR(val)
dts.month = PyDateTime_GET_MONTH(val)
dts.day = PyDateTime_GET_DAY(val)
dts.hour = PyDateTime_DATE_GET_HOUR(val)
dts.min = PyDateTime_DATE_GET_MINUTE(val)
dts.sec = PyDateTime_DATE_GET_SECOND(val)
dts.us = PyDateTime_DATE_GET_MICROSECOND(val)
dts.ps = dts.as = 0
return dtstruct_to_dt64(dts)
cdef inline void pydate_to_dtstruct(date val, npy_datetimestruct *dts):
dts.year = PyDateTime_GET_YEAR(val)
dts.month = PyDateTime_GET_MONTH(val)
dts.day = PyDateTime_GET_DAY(val)
dts.hour = dts.min = dts.sec = dts.us = 0
dts.ps = dts.as = 0
return
cdef inline int64_t pydate_to_dt64(date val, npy_datetimestruct *dts):
pydate_to_dtstruct(val, dts)
return dtstruct_to_dt64(dts)
cdef inline int string_to_dts(
str val,
npy_datetimestruct* dts,
NPY_DATETIMEUNIT* out_bestunit,
int* out_local,
int* out_tzoffset,
bint want_exc,
) except? -1:
cdef:
Py_ssize_t length
const char* buf
buf = get_c_string_buf_and_size(val, &length)
return parse_iso_8601_datetime(buf, length, want_exc,
dts, out_bestunit, out_local, out_tzoffset)
cpdef ndarray astype_overflowsafe(
ndarray values,
cnp.dtype dtype,
bint copy=True,
):
"""
Convert an ndarray with datetime64[X] to datetime64[Y]
or timedelta64[X] to timedelta64[Y],
raising on overflow.
"""
if values.descr.type_num == dtype.type_num == cnp.NPY_DATETIME:
# i.e. dtype.kind == "M"
pass
elif values.descr.type_num == dtype.type_num == cnp.NPY_TIMEDELTA:
# i.e. dtype.kind == "m"
pass
else:
raise TypeError(
"astype_overflowsafe values.dtype and dtype must be either "
"both-datetime64 or both-timedelta64."
)
cdef:
NPY_DATETIMEUNIT from_unit = get_unit_from_dtype(values.dtype)
NPY_DATETIMEUNIT to_unit = get_unit_from_dtype(dtype)
if (
from_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
or to_unit == NPY_DATETIMEUNIT.NPY_FR_GENERIC
):
# without raising explicitly here, we end up with a SystemError
# built-in function [...] returned a result with an error
raise ValueError(
"datetime64/timedelta64 values and dtype must have a unit specified"
)
if (<object>values).dtype.byteorder == ">":
# GH#29684 we incorrectly get OutOfBoundsDatetime if we dont swap
values = values.astype(values.dtype.newbyteorder("<"))
if from_unit == to_unit:
# Check this before allocating result for perf, might save some memory
if copy:
return values.copy()
return values
elif from_unit > to_unit:
# e.g. ns -> us, so there is no risk of overflow, so we can use
# numpy's astype safely. Note there _is_ risk of truncation.
return values.astype(dtype)
cdef:
ndarray i8values = values.view("i8")
# equiv: result = np.empty((<object>values).shape, dtype="i8")
ndarray iresult = cnp.PyArray_EMPTY(
values.ndim, values.shape, cnp.NPY_INT64, 0
)
cnp.broadcast mi = cnp.PyArray_MultiIterNew2(iresult, i8values)
Py_ssize_t i, N = values.size
int64_t value, new_value
npy_datetimestruct dts
bint is_td = dtype.type_num == cnp.NPY_TIMEDELTA
for i in range(N):
# Analogous to: item = values[i]
value = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
if value == NPY_DATETIME_NAT:
new_value = NPY_DATETIME_NAT
else:
pandas_datetime_to_datetimestruct(value, from_unit, &dts)
try:
check_dts_bounds(&dts, to_unit)
except OutOfBoundsDatetime as err:
if is_td:
tdval = np.timedelta64(value).view(values.dtype)
msg = (
"Cannot convert {tdval} to {dtype} without overflow"
.format(tdval=str(tdval), dtype=str(dtype))
)
raise OutOfBoundsTimedelta(msg) from err
else:
raise
new_value = npy_datetimestruct_to_datetime(to_unit, &dts)
# Analogous to: iresult[i] = new_value
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = new_value
cnp.PyArray_MultiIter_NEXT(mi)
return iresult.view(dtype)