forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathc_timestamp.pyx
411 lines (338 loc) · 14.1 KB
/
c_timestamp.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
"""
_Timestamp is a c-defined subclass of datetime.datetime
It is separate from timestamps.pyx to prevent circular cimports
This allows _Timestamp to be imported in other modules
so that isinstance(obj, _Timestamp) checks can be performed
_Timestamp is PITA. Because we inherit from datetime, which has very specific
construction requirements, we need to do object instantiation in python
(see Timestamp class below). This will serve as a C extension type that
shadows the python class, where we do any heavy lifting.
"""
import warnings
from cpython.object cimport (PyObject_RichCompareBool, PyObject_RichCompare,
Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE)
import numpy as np
cimport numpy as cnp
from numpy cimport int64_t, int8_t, uint8_t, ndarray
cnp.import_array()
from cpython.datetime cimport (datetime,
PyDateTime_Check, PyDelta_Check,
PyDateTime_IMPORT)
PyDateTime_IMPORT
from pandas._libs.tslibs.util cimport (
is_datetime64_object, is_timedelta64_object, is_integer_object,
is_array)
from pandas._libs.tslibs.fields import get_start_end_field, get_date_name_field
from pandas._libs.tslibs.nattype cimport c_NaT as NaT
from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime
from pandas._libs.tslibs.np_datetime cimport (
reverse_ops, cmp_scalar)
from pandas._libs.tslibs.timezones cimport (
get_timezone, is_utc, tz_compare)
from pandas._libs.tslibs.timezones import UTC
from pandas._libs.tslibs.tzconversion cimport tz_convert_single
class NullFrequencyError(ValueError):
"""
Error raised when a null `freq` attribute is used in an operation
that needs a non-null frequency, particularly `DatetimeIndex.shift`,
`TimedeltaIndex.shift`, `PeriodIndex.shift`.
"""
pass
def maybe_integer_op_deprecated(obj):
# GH#22535 add/sub of integers and int-arrays is deprecated
if obj.freq is not None:
warnings.warn("Addition/subtraction of integers and integer-arrays "
f"to {type(obj).__name__} is deprecated, "
"will be removed in a future "
"version. Instead of adding/subtracting `n`, use "
"`n * self.freq`"
, FutureWarning)
cdef class _Timestamp(datetime):
# higher than np.ndarray and np.matrix
__array_priority__ = 100
def __hash__(_Timestamp self):
if self.nanosecond:
return hash(self.value)
return datetime.__hash__(self)
def __richcmp__(_Timestamp self, object other, int op):
cdef:
_Timestamp ots
int ndim
if isinstance(other, _Timestamp):
ots = other
elif other is NaT:
return op == Py_NE
elif PyDateTime_Check(other):
if self.nanosecond == 0:
val = self.to_pydatetime()
return PyObject_RichCompareBool(val, other, op)
try:
ots = type(self)(other)
except ValueError:
return self._compare_outside_nanorange(other, op)
else:
ndim = getattr(other, "ndim", -1)
if ndim != -1:
if ndim == 0:
if is_datetime64_object(other):
other = type(self)(other)
elif is_array(other):
# zero-dim array, occurs if try comparison with
# datetime64 scalar on the left hand side
# Unfortunately, for datetime64 values, other.item()
# incorrectly returns an integer, so we need to use
# the numpy C api to extract it.
other = cnp.PyArray_ToScalar(cnp.PyArray_DATA(other),
other)
other = type(self)(other)
else:
return NotImplemented
elif is_array(other):
# avoid recursion error GH#15183
return PyObject_RichCompare(np.array([self]), other, op)
return PyObject_RichCompare(other, self, reverse_ops[op])
else:
return NotImplemented
self._assert_tzawareness_compat(other)
return cmp_scalar(self.value, ots.value, op)
def __reduce_ex__(self, protocol):
# python 3.6 compat
# http://bugs.python.org/issue28730
# now __reduce_ex__ is defined and higher priority than __reduce__
return self.__reduce__()
def __repr__(self) -> str:
stamp = self._repr_base
zone = None
try:
stamp += self.strftime('%z')
if self.tzinfo:
zone = get_timezone(self.tzinfo)
except ValueError:
year2000 = self.replace(year=2000)
stamp += year2000.strftime('%z')
if self.tzinfo:
zone = get_timezone(self.tzinfo)
try:
stamp += zone.strftime(' %%Z')
except AttributeError:
# e.g. tzlocal has no `strftime`
pass
tz = f", tz='{zone}'" if zone is not None else ""
freq = "" if self.freq is None else f", freq='{self.freqstr}'"
return f"Timestamp('{stamp}'{tz}{freq})"
cdef bint _compare_outside_nanorange(_Timestamp self, datetime other,
int op) except -1:
cdef:
datetime dtval = self.to_pydatetime()
self._assert_tzawareness_compat(other)
if self.nanosecond == 0:
return PyObject_RichCompareBool(dtval, other, op)
else:
if op == Py_EQ:
return False
elif op == Py_NE:
return True
elif op == Py_LT:
return dtval < other
elif op == Py_LE:
return dtval < other
elif op == Py_GT:
return dtval >= other
elif op == Py_GE:
return dtval >= other
cdef _assert_tzawareness_compat(_Timestamp self, datetime other):
if self.tzinfo is None:
if other.tzinfo is not None:
raise TypeError('Cannot compare tz-naive and tz-aware '
'timestamps')
elif other.tzinfo is None:
raise TypeError('Cannot compare tz-naive and tz-aware timestamps')
cpdef datetime to_pydatetime(_Timestamp self, bint warn=True):
"""
Convert a Timestamp object to a native Python datetime object.
If warn=True, issue a warning if nanoseconds is nonzero.
"""
if self.nanosecond != 0 and warn:
warnings.warn("Discarding nonzero nanoseconds in conversion",
UserWarning, stacklevel=2)
return datetime(self.year, self.month, self.day,
self.hour, self.minute, self.second,
self.microsecond, self.tzinfo)
cpdef to_datetime64(self):
"""
Return a numpy.datetime64 object with 'ns' precision.
"""
return np.datetime64(self.value, 'ns')
def to_numpy(self, dtype=None, copy=False) -> np.datetime64:
"""
Convert the Timestamp to a NumPy datetime64.
.. versionadded:: 0.25.0
This is an alias method for `Timestamp.to_datetime64()`. The dtype and
copy parameters are available here only for compatibility. Their values
will not affect the return value.
Returns
-------
numpy.datetime64
See Also
--------
DatetimeIndex.to_numpy : Similar method for DatetimeIndex.
"""
return self.to_datetime64()
def __add__(self, other):
cdef:
int64_t other_int, nanos = 0
if is_timedelta64_object(other):
other_int = other.astype('timedelta64[ns]').view('i8')
return type(self)(self.value + other_int, tz=self.tzinfo, freq=self.freq)
elif is_integer_object(other):
maybe_integer_op_deprecated(self)
if self is NaT:
# to be compat with Period
return NaT
elif self.freq is None:
raise NullFrequencyError(
"Cannot add integral value to Timestamp without freq.")
return type(self)((self.freq * other).apply(self), freq=self.freq)
elif PyDelta_Check(other) or hasattr(other, 'delta'):
# delta --> offsets.Tick
# logic copied from delta_to_nanoseconds to prevent circular import
if hasattr(other, 'nanos'):
nanos = other.nanos
elif hasattr(other, 'delta'):
nanos = other.delta
elif PyDelta_Check(other):
nanos = (other.days * 24 * 60 * 60 * 1000000 +
other.seconds * 1000000 +
other.microseconds) * 1000
result = type(self)(self.value + nanos, tz=self.tzinfo, freq=self.freq)
return result
elif is_array(other):
if other.dtype.kind in ['i', 'u']:
maybe_integer_op_deprecated(self)
if self.freq is None:
raise NullFrequencyError(
"Cannot add integer-dtype array "
"to Timestamp without freq.")
return self.freq * other + self
# index/series like
elif hasattr(other, '_typ'):
return NotImplemented
result = datetime.__add__(self, other)
if PyDateTime_Check(result):
result = type(self)(result)
result.nanosecond = self.nanosecond
return result
def __sub__(self, other):
if (is_timedelta64_object(other) or is_integer_object(other) or
PyDelta_Check(other) or hasattr(other, 'delta')):
# `delta` attribute is for offsets.Tick or offsets.Week obj
neg_other = -other
return self + neg_other
elif is_array(other):
if other.dtype.kind in ['i', 'u']:
maybe_integer_op_deprecated(self)
if self.freq is None:
raise NullFrequencyError(
"Cannot subtract integer-dtype array "
"from Timestamp without freq.")
return self - self.freq * other
typ = getattr(other, '_typ', None)
if typ is not None:
return NotImplemented
if other is NaT:
return NaT
# coerce if necessary if we are a Timestamp-like
if (PyDateTime_Check(self)
and (PyDateTime_Check(other) or is_datetime64_object(other))):
if isinstance(self, _Timestamp):
other = type(self)(other)
else:
self = type(other)(self)
# validate tz's
if not tz_compare(self.tzinfo, other.tzinfo):
raise TypeError("Timestamp subtraction must have the "
"same timezones or no timezones")
# scalar Timestamp/datetime - Timestamp/datetime -> yields a
# Timedelta
from pandas._libs.tslibs.timedeltas import Timedelta
try:
return Timedelta(self.value - other.value)
except (OverflowError, OutOfBoundsDatetime):
pass
elif is_datetime64_object(self):
# GH#28286 cython semantics for __rsub__, `other` is actually
# the Timestamp
return type(other)(self) - other
return NotImplemented
cdef int64_t _maybe_convert_value_to_local(self):
"""Convert UTC i8 value to local i8 value if tz exists"""
cdef:
int64_t val
val = self.value
if self.tz is not None and not is_utc(self.tz):
val = tz_convert_single(self.value, UTC, self.tz)
return val
cpdef bint _get_start_end_field(self, str field):
cdef:
int64_t val
dict kwds
ndarray[uint8_t, cast=True] out
int month_kw
freq = self.freq
if freq:
kwds = freq.kwds
month_kw = kwds.get('startingMonth', kwds.get('month', 12))
freqstr = self.freqstr
else:
month_kw = 12
freqstr = None
val = self._maybe_convert_value_to_local()
out = get_start_end_field(np.array([val], dtype=np.int64),
field, freqstr, month_kw)
return out[0]
cpdef _get_date_name_field(self, object field, object locale):
cdef:
int64_t val
object[:] out
val = self._maybe_convert_value_to_local()
out = get_date_name_field(np.array([val], dtype=np.int64),
field, locale=locale)
return out[0]
@property
def _repr_base(self) -> str:
return f"{self._date_repr} {self._time_repr}"
@property
def _date_repr(self) -> str:
# Ideal here would be self.strftime("%Y-%m-%d"), but
# the datetime strftime() methods require year >= 1900
return f'{self.year}-{self.month:02d}-{self.day:02d}'
@property
def _time_repr(self) -> str:
result = f'{self.hour:02d}:{self.minute:02d}:{self.second:02d}'
if self.nanosecond != 0:
result += f'.{self.nanosecond + 1000 * self.microsecond:09d}'
elif self.microsecond != 0:
result += f'.{self.microsecond:06d}'
return result
@property
def _short_repr(self) -> str:
# format a Timestamp with only _date_repr if possible
# otherwise _repr_base
if (self.hour == 0 and
self.minute == 0 and
self.second == 0 and
self.microsecond == 0 and
self.nanosecond == 0):
return self._date_repr
return self._repr_base
@property
def asm8(self) -> np.datetime64:
"""
Return numpy datetime64 format in nanoseconds.
"""
return np.datetime64(self.value, 'ns')
def timestamp(self):
"""Return POSIX timestamp as float."""
# GH 17329
# Note: Naive timestamps will not match datetime.stdlib
return round(self.value / 1e9, 6)