diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index c7b774ad08673..245c554570ce4 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -23,7 +23,7 @@ cimport pandas._libs.util as util from pandas._libs.tslibs import Period, Timedelta from pandas._libs.tslibs.nattype cimport c_NaT as NaT -from pandas._libs.tslibs.c_timestamp cimport _Timestamp +from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.hashtable cimport HashTable @@ -378,7 +378,7 @@ cdef class DatetimeEngine(Int64Engine): cdef int64_t _unbox_scalar(self, scalar) except? -1: # NB: caller is responsible for ensuring tzawareness compat # before we get here - if not (isinstance(scalar, _Timestamp) or scalar is NaT): + if not (isinstance(scalar, ABCTimestamp) or scalar is NaT): raise TypeError(scalar) return scalar.value diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 556cab565860c..5547a0c179f86 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -26,7 +26,7 @@ from pandas._libs.util cimport ( is_integer_object, ) -from pandas._libs.tslibs.c_timestamp cimport _Timestamp +from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.tslibs.np_datetime cimport ( _string_to_dts, @@ -617,7 +617,7 @@ cpdef array_to_datetime( 'datetime64 unless utc=True') else: iresult[i] = pydatetime_to_dt64(val, &dts) - if isinstance(val, _Timestamp): + if isinstance(val, ABCTimestamp): iresult[i] += val.nanosecond check_dts_bounds(&dts) diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index 47b35aa6eabff..078832260cd58 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -21,8 +21,5 @@ from .np_datetime import OutOfBoundsDatetime from .period import IncompatibleFrequency, Period from .timedeltas import Timedelta, delta_to_nanoseconds, ints_to_pytimedelta -from .timestamps import Timestamp +from .timestamps import NullFrequencyError, Timestamp from .tzconversion import tz_convert_single - -# import fails if we do this before np_datetime -from .c_timestamp import NullFrequencyError # isort:skip diff --git a/pandas/_libs/tslibs/base.pxd b/pandas/_libs/tslibs/base.pxd new file mode 100644 index 0000000000000..d32413c3bad86 --- /dev/null +++ b/pandas/_libs/tslibs/base.pxd @@ -0,0 +1,20 @@ +from cpython.datetime cimport datetime, timedelta + +cdef class ABCTimedelta(timedelta): + pass + + +cdef class ABCTimestamp(datetime): + pass + + +cdef class ABCTick: + pass + + +cdef class ABCPeriod: + pass + + +cdef bint is_tick_object(object obj) +cdef bint is_period_object(object obj) diff --git a/pandas/_libs/tslibs/base.pyx b/pandas/_libs/tslibs/base.pyx new file mode 100644 index 0000000000000..59daba4510b6e --- /dev/null +++ b/pandas/_libs/tslibs/base.pyx @@ -0,0 +1,32 @@ +""" +We define base classes that will be inherited by Timestamp, Timedelta, etc +in order to allow for fast isinstance checks without circular dependency issues. + +This is analogous to core.dtypes.generic. +""" + +from cpython.datetime cimport datetime, timedelta + + +cdef class ABCTimedelta(timedelta): + pass + + +cdef class ABCTimestamp(datetime): + pass + + +cdef class ABCPeriod: + pass + + +cdef class ABCTick: + pass + + +cdef bint is_tick_object(object obj): + return isinstance(obj, ABCTick) + + +cdef bint is_period_object(object obj): + return isinstance(obj, ABCPeriod) diff --git a/pandas/_libs/tslibs/c_timestamp.pxd b/pandas/_libs/tslibs/c_timestamp.pxd deleted file mode 100644 index d095b6027d2f9..0000000000000 --- a/pandas/_libs/tslibs/c_timestamp.pxd +++ /dev/null @@ -1,17 +0,0 @@ -from cpython.datetime cimport datetime - -from numpy cimport int64_t - -cdef class _Timestamp(datetime): - cdef readonly: - int64_t value, nanosecond - object freq - list _date_attributes - cpdef bint _get_start_end_field(self, str field) - cpdef _get_date_name_field(self, object field, object locale) - cdef int64_t _maybe_convert_value_to_local(self) - cpdef to_datetime64(self) - cdef _assert_tzawareness_compat(_Timestamp self, datetime other) - cpdef datetime to_pydatetime(_Timestamp self, bint warn=*) - cdef bint _compare_outside_nanorange(_Timestamp self, datetime other, - int op) except -1 diff --git a/pandas/_libs/tslibs/c_timestamp.pyx b/pandas/_libs/tslibs/c_timestamp.pyx deleted file mode 100644 index 1e48f5445c1f6..0000000000000 --- a/pandas/_libs/tslibs/c_timestamp.pyx +++ /dev/null @@ -1,434 +0,0 @@ -""" -_Timestamp is a c-defined subclass of datetime.datetime - -It is separate from timestamps.pyx to prevent circular cimports - -This allows _Timestamp to be imported in other modules -so that isinstance(obj, _Timestamp) checks can be performed - -_Timestamp is PITA. Because we inherit from datetime, which has very specific -construction requirements, we need to do object instantiation in python -(see Timestamp class below). This will serve as a C extension type that -shadows the python class, where we do any heavy lifting. -""" - -import warnings - -from cpython.object cimport (PyObject_RichCompareBool, PyObject_RichCompare, - Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE) - -import numpy as np -cimport numpy as cnp -from numpy cimport int64_t, int8_t, uint8_t, ndarray -cnp.import_array() - -from cpython.datetime cimport (datetime, - PyDateTime_Check, PyDelta_Check, - PyDateTime_IMPORT) -PyDateTime_IMPORT - -from pandas._libs.tslibs.util cimport ( - is_datetime64_object, is_timedelta64_object, is_integer_object, - is_array) - -from pandas._libs.tslibs.fields import get_start_end_field, get_date_name_field -from pandas._libs.tslibs.nattype cimport c_NaT as NaT -from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime -from pandas._libs.tslibs.np_datetime cimport ( - reverse_ops, cmp_scalar) -from pandas._libs.tslibs.timezones cimport ( - get_timezone, is_utc, tz_compare, - utc_pytz as UTC, -) -from pandas._libs.tslibs.tzconversion cimport tz_convert_single -from pandas._libs.tslibs.offsets cimport is_tick_object - - -class NullFrequencyError(ValueError): - """ - Error raised when a null `freq` attribute is used in an operation - that needs a non-null frequency, particularly `DatetimeIndex.shift`, - `TimedeltaIndex.shift`, `PeriodIndex.shift`. - """ - pass - - -def integer_op_not_supported(obj): - # GH#22535 add/sub of integers and int-arrays is no longer allowed - # Note we return rather than raise the exception so we can raise in - # the caller; mypy finds this more palatable. - cls = type(obj).__name__ - - # GH#30886 using an fstring raises SystemError - int_addsub_msg = ( - "Addition/subtraction of integers and integer-arrays with {cls} is " - "no longer supported. Instead of adding/subtracting `n`, " - "use `n * obj.freq`" - ).format(cls=cls) - return TypeError(int_addsub_msg) - - -cdef class _Timestamp(datetime): - - # higher than np.ndarray and np.matrix - __array_priority__ = 100 - - def __hash__(_Timestamp self): - if self.nanosecond: - return hash(self.value) - return datetime.__hash__(self) - - def __richcmp__(_Timestamp self, object other, int op): - cdef: - _Timestamp ots - int ndim - - if isinstance(other, _Timestamp): - ots = other - elif other is NaT: - return op == Py_NE - elif PyDateTime_Check(other): - if self.nanosecond == 0: - val = self.to_pydatetime() - return PyObject_RichCompareBool(val, other, op) - - try: - ots = type(self)(other) - except ValueError: - return self._compare_outside_nanorange(other, op) - else: - ndim = getattr(other, "ndim", -1) - - if ndim != -1: - if ndim == 0: - if is_datetime64_object(other): - other = type(self)(other) - elif is_array(other): - # zero-dim array, occurs if try comparison with - # datetime64 scalar on the left hand side - # Unfortunately, for datetime64 values, other.item() - # incorrectly returns an integer, so we need to use - # the numpy C api to extract it. - other = cnp.PyArray_ToScalar(cnp.PyArray_DATA(other), - other) - other = type(self)(other) - else: - return NotImplemented - elif is_array(other): - # avoid recursion error GH#15183 - if other.dtype.kind == "M": - if self.tz is None: - return PyObject_RichCompare(self.asm8, other, op) - raise TypeError( - "Cannot compare tz-naive and tz-aware timestamps" - ) - if other.dtype.kind == "O": - # Operate element-wise - return np.array( - [PyObject_RichCompare(self, x, op) for x in other], - dtype=bool, - ) - return PyObject_RichCompare(np.array([self]), other, op) - return PyObject_RichCompare(other, self, reverse_ops[op]) - else: - return NotImplemented - - self._assert_tzawareness_compat(other) - return cmp_scalar(self.value, ots.value, op) - - def __reduce_ex__(self, protocol): - # python 3.6 compat - # https://bugs.python.org/issue28730 - # now __reduce_ex__ is defined and higher priority than __reduce__ - return self.__reduce__() - - def __repr__(self) -> str: - stamp = self._repr_base - zone = None - - try: - stamp += self.strftime('%z') - if self.tzinfo: - zone = get_timezone(self.tzinfo) - except ValueError: - year2000 = self.replace(year=2000) - stamp += year2000.strftime('%z') - if self.tzinfo: - zone = get_timezone(self.tzinfo) - - try: - stamp += zone.strftime(' %%Z') - except AttributeError: - # e.g. tzlocal has no `strftime` - pass - - tz = f", tz='{zone}'" if zone is not None else "" - freq = "" if self.freq is None else f", freq='{self.freqstr}'" - - return f"Timestamp('{stamp}'{tz}{freq})" - - cdef bint _compare_outside_nanorange(_Timestamp self, datetime other, - int op) except -1: - cdef: - datetime dtval = self.to_pydatetime() - - self._assert_tzawareness_compat(other) - - if self.nanosecond == 0: - return PyObject_RichCompareBool(dtval, other, op) - else: - if op == Py_EQ: - return False - elif op == Py_NE: - return True - elif op == Py_LT: - return dtval < other - elif op == Py_LE: - return dtval < other - elif op == Py_GT: - return dtval >= other - elif op == Py_GE: - return dtval >= other - - cdef _assert_tzawareness_compat(_Timestamp self, datetime other): - if self.tzinfo is None: - if other.tzinfo is not None: - raise TypeError('Cannot compare tz-naive and tz-aware ' - 'timestamps') - elif other.tzinfo is None: - raise TypeError('Cannot compare tz-naive and tz-aware timestamps') - - cpdef datetime to_pydatetime(_Timestamp self, bint warn=True): - """ - Convert a Timestamp object to a native Python datetime object. - - If warn=True, issue a warning if nanoseconds is nonzero. - """ - if self.nanosecond != 0 and warn: - warnings.warn("Discarding nonzero nanoseconds in conversion", - UserWarning, stacklevel=2) - - return datetime(self.year, self.month, self.day, - self.hour, self.minute, self.second, - self.microsecond, self.tzinfo) - - cpdef to_datetime64(self): - """ - Return a numpy.datetime64 object with 'ns' precision. - """ - return np.datetime64(self.value, 'ns') - - def to_numpy(self, dtype=None, copy=False) -> np.datetime64: - """ - Convert the Timestamp to a NumPy datetime64. - - .. versionadded:: 0.25.0 - - This is an alias method for `Timestamp.to_datetime64()`. The dtype and - copy parameters are available here only for compatibility. Their values - will not affect the return value. - - Returns - ------- - numpy.datetime64 - - See Also - -------- - DatetimeIndex.to_numpy : Similar method for DatetimeIndex. - """ - return self.to_datetime64() - - def __add__(self, other): - cdef: - int64_t other_int, nanos = 0 - - if is_timedelta64_object(other): - other_int = other.astype('timedelta64[ns]').view('i8') - return type(self)(self.value + other_int, tz=self.tzinfo, freq=self.freq) - - elif is_integer_object(other): - raise integer_op_not_supported(self) - - elif PyDelta_Check(other): - # logic copied from delta_to_nanoseconds to prevent circular import - if hasattr(other, 'delta'): - # pd.Timedelta - nanos = other.value - elif PyDelta_Check(other): - nanos = (other.days * 24 * 60 * 60 * 1000000 + - other.seconds * 1000000 + - other.microseconds) * 1000 - - result = type(self)(self.value + nanos, tz=self.tzinfo, freq=self.freq) - return result - - elif is_tick_object(other): - try: - nanos = other.nanos - except OverflowError: - raise OverflowError( - f"the add operation between {other} and {self} will overflow" - ) - result = type(self)(self.value + nanos, tz=self.tzinfo, freq=self.freq) - return result - - elif is_array(other): - if other.dtype.kind in ['i', 'u']: - raise integer_op_not_supported(self) - if other.dtype.kind == "m": - if self.tz is None: - return self.asm8 + other - return np.asarray( - [self + other[n] for n in range(len(other))], - dtype=object, - ) - - return NotImplemented - - def __sub__(self, other): - - if (is_timedelta64_object(other) or is_integer_object(other) or - PyDelta_Check(other) or is_tick_object(other)): - neg_other = -other - return self + neg_other - - elif is_array(other): - if other.dtype.kind in ['i', 'u']: - raise integer_op_not_supported(self) - if other.dtype.kind == "m": - if self.tz is None: - return self.asm8 - other - return np.asarray( - [self - other[n] for n in range(len(other))], - dtype=object, - ) - return NotImplemented - - if other is NaT: - return NaT - - # coerce if necessary if we are a Timestamp-like - if (PyDateTime_Check(self) - and (PyDateTime_Check(other) or is_datetime64_object(other))): - # both_timestamps is to determine whether Timedelta(self - other) - # should raise the OOB error, or fall back returning a timedelta. - both_timestamps = (isinstance(other, _Timestamp) and - isinstance(self, _Timestamp)) - if isinstance(self, _Timestamp): - other = type(self)(other) - else: - self = type(other)(self) - - # validate tz's - if not tz_compare(self.tzinfo, other.tzinfo): - raise TypeError("Timestamp subtraction must have the " - "same timezones or no timezones") - - # scalar Timestamp/datetime - Timestamp/datetime -> yields a - # Timedelta - from pandas._libs.tslibs.timedeltas import Timedelta - try: - return Timedelta(self.value - other.value) - except (OverflowError, OutOfBoundsDatetime) as err: - if isinstance(other, _Timestamp): - if both_timestamps: - raise OutOfBoundsDatetime( - "Result is too large for pandas.Timedelta. Convert inputs " - "to datetime.datetime with 'Timestamp.to_pydatetime()' " - "before subtracting." - ) from err - # We get here in stata tests, fall back to stdlib datetime - # method and return stdlib timedelta object - pass - elif is_datetime64_object(self): - # GH#28286 cython semantics for __rsub__, `other` is actually - # the Timestamp - return type(other)(self) - other - - return NotImplemented - - cdef int64_t _maybe_convert_value_to_local(self): - """Convert UTC i8 value to local i8 value if tz exists""" - cdef: - int64_t val - val = self.value - if self.tz is not None and not is_utc(self.tz): - val = tz_convert_single(self.value, UTC, self.tz) - return val - - cpdef bint _get_start_end_field(self, str field): - cdef: - int64_t val - dict kwds - ndarray[uint8_t, cast=True] out - int month_kw - - freq = self.freq - if freq: - kwds = freq.kwds - month_kw = kwds.get('startingMonth', kwds.get('month', 12)) - freqstr = self.freqstr - else: - month_kw = 12 - freqstr = None - - val = self._maybe_convert_value_to_local() - out = get_start_end_field(np.array([val], dtype=np.int64), - field, freqstr, month_kw) - return out[0] - - cpdef _get_date_name_field(self, object field, object locale): - cdef: - int64_t val - object[:] out - - val = self._maybe_convert_value_to_local() - out = get_date_name_field(np.array([val], dtype=np.int64), - field, locale=locale) - return out[0] - - @property - def _repr_base(self) -> str: - return f"{self._date_repr} {self._time_repr}" - - @property - def _date_repr(self) -> str: - # Ideal here would be self.strftime("%Y-%m-%d"), but - # the datetime strftime() methods require year >= 1900 - return f'{self.year}-{self.month:02d}-{self.day:02d}' - - @property - def _time_repr(self) -> str: - result = f'{self.hour:02d}:{self.minute:02d}:{self.second:02d}' - - if self.nanosecond != 0: - result += f'.{self.nanosecond + 1000 * self.microsecond:09d}' - elif self.microsecond != 0: - result += f'.{self.microsecond:06d}' - - return result - - @property - def _short_repr(self) -> str: - # format a Timestamp with only _date_repr if possible - # otherwise _repr_base - if (self.hour == 0 and - self.minute == 0 and - self.second == 0 and - self.microsecond == 0 and - self.nanosecond == 0): - return self._date_repr - return self._repr_base - - @property - def asm8(self) -> np.datetime64: - """ - Return numpy datetime64 format in nanoseconds. - """ - return np.datetime64(self.value, 'ns') - - def timestamp(self): - """Return POSIX timestamp as float.""" - # GH 17329 - # Note: Naive timestamps will not match datetime.stdlib - return round(self.value / 1e9, 6) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 823be1205d43a..264861ef8a2d4 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -13,7 +13,7 @@ from cpython.datetime cimport (datetime, time, tzinfo, PyDateTime_IMPORT) PyDateTime_IMPORT -from pandas._libs.tslibs.c_timestamp cimport _Timestamp +from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.tslibs.np_datetime cimport ( check_dts_bounds, npy_datetimestruct, pandas_datetime_to_datetimestruct, @@ -353,7 +353,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, offset = get_utcoffset(obj.tzinfo, ts) obj.value -= int(offset.total_seconds() * 1e9) - if isinstance(ts, _Timestamp): + if isinstance(ts, ABCTimestamp): obj.value += ts.nanosecond obj.dts.ps = ts.nanosecond * 1000 @@ -668,7 +668,7 @@ cpdef inline datetime localize_pydatetime(datetime dt, object tz): """ if tz is None: return dt - elif isinstance(dt, _Timestamp): + elif isinstance(dt, ABCTimestamp): return dt.tz_localize(tz) elif is_utc(tz): return _localize_pydatetime(dt, tz) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 393db82ad20aa..ff0d96f0f2525 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -21,6 +21,8 @@ cnp.import_array() from pandas._libs.tslibs cimport util from pandas._libs.tslibs.util cimport is_integer_object +from pandas._libs.tslibs.base cimport ABCTick, ABCTimestamp + from pandas._libs.tslibs.ccalendar import MONTHS, DAYS from pandas._libs.tslibs.ccalendar cimport get_days_in_month, dayofweek from pandas._libs.tslibs.conversion cimport ( @@ -32,7 +34,6 @@ from pandas._libs.tslibs.np_datetime cimport ( npy_datetimestruct, dtstruct_to_dt64, dt64_to_dtstruct) from pandas._libs.tslibs.timezones cimport utc_pytz as UTC from pandas._libs.tslibs.tzconversion cimport tz_convert_single -from pandas._libs.tslibs.c_timestamp cimport _Timestamp # --------------------------------------------------------------------- @@ -107,7 +108,7 @@ cdef to_offset(object obj): def as_datetime(obj: datetime) -> datetime: - if isinstance(obj, _Timestamp): + if isinstance(obj, ABCTimestamp): return obj.to_pydatetime() return obj @@ -116,7 +117,7 @@ cpdef bint is_normalized(datetime dt): if dt.hour != 0 or dt.minute != 0 or dt.second != 0 or dt.microsecond != 0: # Regardless of whether dt is datetime vs Timestamp return False - if isinstance(dt, _Timestamp): + if isinstance(dt, ABCTimestamp): return dt.nanosecond == 0 return True @@ -618,7 +619,7 @@ class BaseOffset(_BaseOffset): return -self + other -cdef class _Tick: +cdef class _Tick(ABCTick): """ dummy class to mix into tseries.offsets.Tick so that in tslibs.period we can do isinstance checks on _Tick and avoid importing tseries.offsets diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 40601733a454c..3c02cda651728 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -37,6 +37,8 @@ cdef extern from "src/datetime/np_datetime.h": cimport pandas._libs.tslibs.util as util +from pandas._libs.tslibs.base cimport ABCPeriod + from pandas._libs.tslibs.timestamps import Timestamp from pandas._libs.tslibs.timezones cimport is_utc, is_tzlocal, get_dst_info from pandas._libs.tslibs.timedeltas import Timedelta @@ -1520,7 +1522,7 @@ class IncompatibleFrequency(ValueError): pass -cdef class _Period: +cdef class _Period(ABCPeriod): cdef readonly: int64_t ordinal diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 58600678c0938..9016df335e2c5 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -21,7 +21,7 @@ from pandas._libs.tslibs.util cimport ( is_float_object, is_array ) -from pandas._libs.tslibs.c_timestamp cimport _Timestamp +from pandas._libs.tslibs.base cimport ABCTimedelta, ABCTimestamp, is_tick_object from pandas._libs.tslibs.ccalendar import DAY_SECONDS @@ -31,7 +31,7 @@ from pandas._libs.tslibs.np_datetime cimport ( from pandas._libs.tslibs.nattype import nat_strings from pandas._libs.tslibs.nattype cimport ( checknull_with_nat, NPY_NAT, c_NaT as NaT) -from pandas._libs.tslibs.offsets cimport to_offset, is_tick_object +from pandas._libs.tslibs.offsets cimport to_offset # ---------------------------------------------------------------------- # Constants @@ -595,7 +595,7 @@ def _binary_op_method_timedeltalike(op, name): pass elif is_datetime64_object(other) or ( - PyDateTime_Check(other) and not isinstance(other, _Timestamp)): + PyDateTime_Check(other) and not isinstance(other, ABCTimestamp)): # this case is for a datetime object that is specifically # *not* a Timestamp, as the Timestamp case will be # handled after `_validate_ops_compat` returns False below @@ -753,7 +753,7 @@ cdef _to_py_int_float(v): # timedeltas that we need to do object instantiation in python. This will # serve as a C extension type that shadows the Python class, where we do any # heavy lifting. -cdef class _Timedelta(timedelta): +cdef class _Timedelta(ABCTimedelta): cdef readonly: int64_t value # nanoseconds object freq # frequency reference diff --git a/pandas/_libs/tslibs/timestamps.pxd b/pandas/_libs/tslibs/timestamps.pxd index 3cb4b6cd8113b..90f50e3af503c 100644 --- a/pandas/_libs/tslibs/timestamps.pxd +++ b/pandas/_libs/tslibs/timestamps.pxd @@ -1,6 +1,26 @@ +from cpython.datetime cimport datetime + from numpy cimport int64_t + +from pandas._libs.tslibs.base cimport ABCTimestamp from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct + cdef object create_timestamp_from_ts(int64_t value, npy_datetimestruct dts, object tz, object freq, bint fold) + + +cdef class _Timestamp(ABCTimestamp): + cdef readonly: + int64_t value, nanosecond + object freq + list _date_attributes + cpdef bint _get_start_end_field(self, str field) + cpdef _get_date_name_field(self, object field, object locale) + cdef int64_t _maybe_convert_value_to_local(self) + cpdef to_datetime64(self) + cdef _assert_tzawareness_compat(_Timestamp self, datetime other) + cpdef datetime to_pydatetime(_Timestamp self, bint warn=*) + cdef bint _compare_outside_nanorange(_Timestamp self, datetime other, + int op) except -1 diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index f463491cf01c4..e656d654461c9 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1,34 +1,53 @@ +""" +_Timestamp is a c-defined subclass of datetime.datetime + +_Timestamp is PITA. Because we inherit from datetime, which has very specific +construction requirements, we need to do object instantiation in python +(see Timestamp class below). This will serve as a C extension type that +shadows the python class, where we do any heavy lifting. +""" import warnings import numpy as np cimport numpy as cnp -from numpy cimport int64_t +from numpy cimport int64_t, int8_t, uint8_t, ndarray cnp.import_array() +from cpython.object cimport (PyObject_RichCompareBool, PyObject_RichCompare, + Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE) + from cpython.datetime cimport (datetime, time, PyDateTime_Check, PyDelta_Check, PyTZInfo_Check, PyDateTime_IMPORT) PyDateTime_IMPORT from pandas._libs.tslibs.util cimport ( - is_datetime64_object, is_float_object, is_integer_object, is_offset_object) + is_datetime64_object, is_float_object, is_integer_object, is_offset_object, + is_timedelta64_object, is_array, +) + +from pandas._libs.tslibs.base cimport ABCTimestamp, is_tick_object -from pandas._libs.tslibs.c_timestamp cimport _Timestamp cimport pandas._libs.tslibs.ccalendar as ccalendar from pandas._libs.tslibs.ccalendar import DAY_SECONDS from pandas._libs.tslibs.conversion import normalize_i8_timestamps from pandas._libs.tslibs.conversion cimport ( _TSObject, convert_to_tsobject, convert_datetime_to_tsobject) +from pandas._libs.tslibs.fields import get_start_end_field, get_date_name_field from pandas._libs.tslibs.nattype cimport NPY_NAT, c_NaT as NaT from pandas._libs.tslibs.np_datetime cimport ( - check_dts_bounds, npy_datetimestruct, dt64_to_dtstruct) + check_dts_bounds, npy_datetimestruct, dt64_to_dtstruct, + reverse_ops, cmp_scalar, +) +from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.offsets cimport to_offset from pandas._libs.tslibs.timedeltas import Timedelta from pandas._libs.tslibs.timezones cimport ( - is_utc, maybe_get_tz, treat_tz_as_pytz, utc_pytz as UTC + is_utc, maybe_get_tz, treat_tz_as_pytz, utc_pytz as UTC, + get_timezone, tz_compare, ) -from pandas._libs.tslibs.tzconversion import ( - tz_localize_to_utc, tz_convert_single) +from pandas._libs.tslibs.tzconversion cimport tz_convert_single +from pandas._libs.tslibs.tzconversion import tz_localize_to_utc # ---------------------------------------------------------------------- # Constants @@ -162,6 +181,400 @@ def round_nsint64(values, mode, freq): raise ValueError("round_nsint64 called with an unrecognized rounding mode") +# ---------------------------------------------------------------------- + +class NullFrequencyError(ValueError): + """ + Error raised when a null `freq` attribute is used in an operation + that needs a non-null frequency, particularly `DatetimeIndex.shift`, + `TimedeltaIndex.shift`, `PeriodIndex.shift`. + """ + pass + + +def integer_op_not_supported(obj): + # GH#22535 add/sub of integers and int-arrays is no longer allowed + # Note we return rather than raise the exception so we can raise in + # the caller; mypy finds this more palatable. + cls = type(obj).__name__ + + # GH#30886 using an fstring raises SystemError + int_addsub_msg = ( + "Addition/subtraction of integers and integer-arrays with {cls} is " + "no longer supported. Instead of adding/subtracting `n`, " + "use `n * obj.freq`" + ).format(cls=cls) + return TypeError(int_addsub_msg) + + +# ---------------------------------------------------------------------- + +cdef class _Timestamp(ABCTimestamp): + + # higher than np.ndarray and np.matrix + __array_priority__ = 100 + + def __hash__(_Timestamp self): + if self.nanosecond: + return hash(self.value) + return datetime.__hash__(self) + + def __richcmp__(_Timestamp self, object other, int op): + cdef: + _Timestamp ots + int ndim + + if isinstance(other, _Timestamp): + ots = other + elif other is NaT: + return op == Py_NE + elif PyDateTime_Check(other): + if self.nanosecond == 0: + val = self.to_pydatetime() + return PyObject_RichCompareBool(val, other, op) + + try: + ots = type(self)(other) + except ValueError: + return self._compare_outside_nanorange(other, op) + else: + ndim = getattr(other, "ndim", -1) + + if ndim != -1: + if ndim == 0: + if is_datetime64_object(other): + other = type(self)(other) + elif is_array(other): + # zero-dim array, occurs if try comparison with + # datetime64 scalar on the left hand side + # Unfortunately, for datetime64 values, other.item() + # incorrectly returns an integer, so we need to use + # the numpy C api to extract it. + other = cnp.PyArray_ToScalar(cnp.PyArray_DATA(other), + other) + other = type(self)(other) + else: + return NotImplemented + elif is_array(other): + # avoid recursion error GH#15183 + if other.dtype.kind == "M": + if self.tz is None: + return PyObject_RichCompare(self.asm8, other, op) + raise TypeError( + "Cannot compare tz-naive and tz-aware timestamps" + ) + if other.dtype.kind == "O": + # Operate element-wise + return np.array( + [PyObject_RichCompare(self, x, op) for x in other], + dtype=bool, + ) + return PyObject_RichCompare(np.array([self]), other, op) + return PyObject_RichCompare(other, self, reverse_ops[op]) + else: + return NotImplemented + + self._assert_tzawareness_compat(other) + return cmp_scalar(self.value, ots.value, op) + + def __reduce_ex__(self, protocol): + # python 3.6 compat + # https://bugs.python.org/issue28730 + # now __reduce_ex__ is defined and higher priority than __reduce__ + return self.__reduce__() + + def __repr__(self) -> str: + stamp = self._repr_base + zone = None + + try: + stamp += self.strftime('%z') + if self.tzinfo: + zone = get_timezone(self.tzinfo) + except ValueError: + year2000 = self.replace(year=2000) + stamp += year2000.strftime('%z') + if self.tzinfo: + zone = get_timezone(self.tzinfo) + + try: + stamp += zone.strftime(' %%Z') + except AttributeError: + # e.g. tzlocal has no `strftime` + pass + + tz = f", tz='{zone}'" if zone is not None else "" + freq = "" if self.freq is None else f", freq='{self.freqstr}'" + + return f"Timestamp('{stamp}'{tz}{freq})" + + cdef bint _compare_outside_nanorange(_Timestamp self, datetime other, + int op) except -1: + cdef: + datetime dtval = self.to_pydatetime() + + self._assert_tzawareness_compat(other) + + if self.nanosecond == 0: + return PyObject_RichCompareBool(dtval, other, op) + else: + if op == Py_EQ: + return False + elif op == Py_NE: + return True + elif op == Py_LT: + return dtval < other + elif op == Py_LE: + return dtval < other + elif op == Py_GT: + return dtval >= other + elif op == Py_GE: + return dtval >= other + + cdef _assert_tzawareness_compat(_Timestamp self, datetime other): + if self.tzinfo is None: + if other.tzinfo is not None: + raise TypeError('Cannot compare tz-naive and tz-aware ' + 'timestamps') + elif other.tzinfo is None: + raise TypeError('Cannot compare tz-naive and tz-aware timestamps') + + cpdef datetime to_pydatetime(_Timestamp self, bint warn=True): + """ + Convert a Timestamp object to a native Python datetime object. + + If warn=True, issue a warning if nanoseconds is nonzero. + """ + if self.nanosecond != 0 and warn: + warnings.warn("Discarding nonzero nanoseconds in conversion", + UserWarning, stacklevel=2) + + return datetime(self.year, self.month, self.day, + self.hour, self.minute, self.second, + self.microsecond, self.tzinfo) + + cpdef to_datetime64(self): + """ + Return a numpy.datetime64 object with 'ns' precision. + """ + return np.datetime64(self.value, 'ns') + + def to_numpy(self, dtype=None, copy=False) -> np.datetime64: + """ + Convert the Timestamp to a NumPy datetime64. + + .. versionadded:: 0.25.0 + + This is an alias method for `Timestamp.to_datetime64()`. The dtype and + copy parameters are available here only for compatibility. Their values + will not affect the return value. + + Returns + ------- + numpy.datetime64 + + See Also + -------- + DatetimeIndex.to_numpy : Similar method for DatetimeIndex. + """ + return self.to_datetime64() + + def __add__(self, other): + cdef: + int64_t other_int, nanos = 0 + + if is_timedelta64_object(other): + other_int = other.astype('timedelta64[ns]').view('i8') + return type(self)(self.value + other_int, tz=self.tzinfo, freq=self.freq) + + elif is_integer_object(other): + raise integer_op_not_supported(self) + + elif PyDelta_Check(other): + # logic copied from delta_to_nanoseconds to prevent circular import + if hasattr(other, 'delta'): + # pd.Timedelta + nanos = other.value + elif PyDelta_Check(other): + nanos = (other.days * 24 * 60 * 60 * 1000000 + + other.seconds * 1000000 + + other.microseconds) * 1000 + + result = type(self)(self.value + nanos, tz=self.tzinfo, freq=self.freq) + return result + + elif is_tick_object(other): + try: + nanos = other.nanos + except OverflowError: + raise OverflowError( + f"the add operation between {other} and {self} will overflow" + ) + result = type(self)(self.value + nanos, tz=self.tzinfo, freq=self.freq) + return result + + elif is_array(other): + if other.dtype.kind in ['i', 'u']: + raise integer_op_not_supported(self) + if other.dtype.kind == "m": + if self.tz is None: + return self.asm8 + other + return np.asarray( + [self + other[n] for n in range(len(other))], + dtype=object, + ) + + return NotImplemented + + def __sub__(self, other): + + if (is_timedelta64_object(other) or is_integer_object(other) or + PyDelta_Check(other) or is_tick_object(other)): + neg_other = -other + return self + neg_other + + elif is_array(other): + if other.dtype.kind in ['i', 'u']: + raise integer_op_not_supported(self) + if other.dtype.kind == "m": + if self.tz is None: + return self.asm8 - other + return np.asarray( + [self - other[n] for n in range(len(other))], + dtype=object, + ) + return NotImplemented + + if other is NaT: + return NaT + + # coerce if necessary if we are a Timestamp-like + if (PyDateTime_Check(self) + and (PyDateTime_Check(other) or is_datetime64_object(other))): + # both_timestamps is to determine whether Timedelta(self - other) + # should raise the OOB error, or fall back returning a timedelta. + both_timestamps = (isinstance(other, _Timestamp) and + isinstance(self, _Timestamp)) + if isinstance(self, _Timestamp): + other = type(self)(other) + else: + self = type(other)(self) + + # validate tz's + if not tz_compare(self.tzinfo, other.tzinfo): + raise TypeError("Timestamp subtraction must have the " + "same timezones or no timezones") + + # scalar Timestamp/datetime - Timestamp/datetime -> yields a + # Timedelta + from pandas._libs.tslibs.timedeltas import Timedelta + try: + return Timedelta(self.value - other.value) + except (OverflowError, OutOfBoundsDatetime) as err: + if isinstance(other, _Timestamp): + if both_timestamps: + raise OutOfBoundsDatetime( + "Result is too large for pandas.Timedelta. Convert inputs " + "to datetime.datetime with 'Timestamp.to_pydatetime()' " + "before subtracting." + ) from err + # We get here in stata tests, fall back to stdlib datetime + # method and return stdlib timedelta object + pass + elif is_datetime64_object(self): + # GH#28286 cython semantics for __rsub__, `other` is actually + # the Timestamp + return type(other)(self) - other + + return NotImplemented + + cdef int64_t _maybe_convert_value_to_local(self): + """Convert UTC i8 value to local i8 value if tz exists""" + cdef: + int64_t val + val = self.value + if self.tz is not None and not is_utc(self.tz): + val = tz_convert_single(self.value, UTC, self.tz) + return val + + cpdef bint _get_start_end_field(self, str field): + cdef: + int64_t val + dict kwds + ndarray[uint8_t, cast=True] out + int month_kw + + freq = self.freq + if freq: + kwds = freq.kwds + month_kw = kwds.get('startingMonth', kwds.get('month', 12)) + freqstr = self.freqstr + else: + month_kw = 12 + freqstr = None + + val = self._maybe_convert_value_to_local() + out = get_start_end_field(np.array([val], dtype=np.int64), + field, freqstr, month_kw) + return out[0] + + cpdef _get_date_name_field(self, object field, object locale): + cdef: + int64_t val + object[:] out + + val = self._maybe_convert_value_to_local() + out = get_date_name_field(np.array([val], dtype=np.int64), + field, locale=locale) + return out[0] + + @property + def _repr_base(self) -> str: + return f"{self._date_repr} {self._time_repr}" + + @property + def _date_repr(self) -> str: + # Ideal here would be self.strftime("%Y-%m-%d"), but + # the datetime strftime() methods require year >= 1900 + return f'{self.year}-{self.month:02d}-{self.day:02d}' + + @property + def _time_repr(self) -> str: + result = f'{self.hour:02d}:{self.minute:02d}:{self.second:02d}' + + if self.nanosecond != 0: + result += f'.{self.nanosecond + 1000 * self.microsecond:09d}' + elif self.microsecond != 0: + result += f'.{self.microsecond:06d}' + + return result + + @property + def _short_repr(self) -> str: + # format a Timestamp with only _date_repr if possible + # otherwise _repr_base + if (self.hour == 0 and + self.minute == 0 and + self.second == 0 and + self.microsecond == 0 and + self.nanosecond == 0): + return self._date_repr + return self._repr_base + + @property + def asm8(self) -> np.datetime64: + """ + Return numpy datetime64 format in nanoseconds. + """ + return np.datetime64(self.value, 'ns') + + def timestamp(self): + """Return POSIX timestamp as float.""" + # GH 17329 + # Note: Naive timestamps will not match datetime.stdlib + return round(self.value / 1e9, 6) + + # ---------------------------------------------------------------------- # Python front end to C extension type _Timestamp diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 3aeec70ab72d7..a3d893d68ff85 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -6,10 +6,13 @@ import numpy as np from pandas._libs import NaT, NaTType, Timestamp, algos, iNaT, lib -from pandas._libs.tslibs.c_timestamp import integer_op_not_supported from pandas._libs.tslibs.period import DIFFERENT_FREQ, IncompatibleFrequency, Period from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds -from pandas._libs.tslibs.timestamps import RoundTo, round_nsint64 +from pandas._libs.tslibs.timestamps import ( + RoundTo, + integer_op_not_supported, + round_nsint64, +) from pandas._typing import DatetimeLikeScalar, DtypeObj from pandas.compat import set_function_name from pandas.compat.numpy import function as nv diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py index a4c2e3f0787d0..8356b136359d0 100644 --- a/pandas/tests/tslibs/test_api.py +++ b/pandas/tests/tslibs/test_api.py @@ -6,7 +6,7 @@ def test_namespace(): submodules = [ - "c_timestamp", + "base", "ccalendar", "conversion", "fields", diff --git a/setup.py b/setup.py index 58f3fb5706ad1..63510867f0dd7 100755 --- a/setup.py +++ b/setup.py @@ -307,6 +307,7 @@ class CheckSDist(sdist_class): "pandas/_libs/sparse.pyx", "pandas/_libs/ops.pyx", "pandas/_libs/parsers.pyx", + "pandas/_libs/tslibs/base.pyx", "pandas/_libs/tslibs/c_timestamp.pyx", "pandas/_libs/tslibs/ccalendar.pyx", "pandas/_libs/tslibs/period.pyx", @@ -602,10 +603,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.reshape": {"pyxfile": "_libs/reshape", "depends": []}, "_libs.sparse": {"pyxfile": "_libs/sparse", "depends": _pxi_dep["sparse"]}, "_libs.tslib": {"pyxfile": "_libs/tslib", "depends": tseries_depends}, - "_libs.tslibs.c_timestamp": { - "pyxfile": "_libs/tslibs/c_timestamp", - "depends": tseries_depends, - }, + "_libs.tslibs.base": {"pyxfile": "_libs/tslibs/base"}, "_libs.tslibs.ccalendar": {"pyxfile": "_libs/tslibs/ccalendar"}, "_libs.tslibs.conversion": { "pyxfile": "_libs/tslibs/conversion",