diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 7bb1c45998eb2..eb8821382037d 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from datetime import datetime, timedelta import operator import warnings @@ -8,7 +9,7 @@ from pandas._libs.tslibs import timezones from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds, Timedelta from pandas._libs.tslibs.period import ( - DIFFERENT_FREQ_INDEX, IncompatibleFrequency) + Period, DIFFERENT_FREQ_INDEX, IncompatibleFrequency) from pandas.errors import NullFrequencyError, PerformanceWarning from pandas import compat @@ -19,6 +20,13 @@ from pandas.core.dtypes.common import ( needs_i8_conversion, is_list_like, + is_offsetlike, + is_extension_array_dtype, + is_datetime64_dtype, + is_datetime64_any_dtype, + is_datetime64tz_dtype, + is_float_dtype, + is_integer_dtype, is_bool_dtype, is_period_dtype, is_timedelta64_dtype, @@ -100,7 +108,7 @@ class DatetimeLikeArrayMixin(ExtensionOpsMixin, AttributesMixin): _freq and that the inheriting class has methods: - _validate_frequency + _generate_range """ @property @@ -132,6 +140,14 @@ def asi8(self): # ------------------------------------------------------------------ # Array-like Methods + @property + def shape(self): + return (len(self),) + + @property + def size(self): + return np.prod(self.shape) + def __len__(self): return len(self._data) @@ -296,6 +312,34 @@ def resolution(self): """ return frequencies.Resolution.get_str(self._resolution) + @classmethod + def _validate_frequency(cls, index, freq, **kwargs): + """ + Validate that a frequency is compatible with the values of a given + Datetime Array/Index or Timedelta Array/Index + + Parameters + ---------- + index : DatetimeIndex or TimedeltaIndex + The index on which to determine if the given frequency is valid + freq : DateOffset + The frequency to validate + """ + if is_period_dtype(cls): + # Frequency validation is not meaningful for Period Array/Index + return None + + inferred = index.inferred_freq + if index.size == 0 or inferred == freq.freqstr: + return None + + on_freq = cls._generate_range(start=index[0], end=None, + periods=len(index), freq=freq, **kwargs) + if not np.array_equal(index.asi8, on_freq.asi8): + raise ValueError('Inferred frequency {infer} from passed values ' + 'does not conform to passed frequency {passed}' + .format(infer=inferred, passed=freq.freqstr)) + # ------------------------------------------------------------------ # Arithmetic Methods @@ -477,6 +521,188 @@ def _addsub_offset_array(self, other, op): kwargs['freq'] = 'infer' return type(self)(res_values, **kwargs) + def shift(self, n, freq=None): + """ + Specialized shift which produces a Datetime/Timedelta Array/Index + + Parameters + ---------- + n : int + Periods to shift by + freq : DateOffset or timedelta-like, optional + + Returns + ------- + shifted : same type as self + """ + if freq is not None and freq != self.freq: + if isinstance(freq, compat.string_types): + freq = frequencies.to_offset(freq) + offset = n * freq + result = self + offset + + if hasattr(self, 'tz'): + result._tz = self.tz + + return result + + if n == 0: + # immutable so OK + return self + + if self.freq is None: + raise NullFrequencyError("Cannot shift with no freq") + + start = self[0] + n * self.freq + end = self[-1] + n * self.freq + attribs = self._get_attributes_dict() + return self._generate_range(start=start, end=end, periods=None, + **attribs) + + @classmethod + def _add_datetimelike_methods(cls): + """ + add in the datetimelike methods (as we may have to override the + superclass) + """ + + def __add__(self, other): + other = lib.item_from_zerodim(other) + if isinstance(other, (ABCSeries, ABCDataFrame)): + return NotImplemented + + # scalar others + elif other is NaT: + result = self._add_nat() + elif isinstance(other, (Tick, timedelta, np.timedelta64)): + result = self._add_delta(other) + elif isinstance(other, DateOffset): + # specifically _not_ a Tick + result = self._add_offset(other) + elif isinstance(other, (datetime, np.datetime64)): + result = self._add_datelike(other) + elif lib.is_integer(other): + # This check must come after the check for np.timedelta64 + # as is_integer returns True for these + result = self.shift(other) + + # array-like others + elif is_timedelta64_dtype(other): + # TimedeltaIndex, ndarray[timedelta64] + result = self._add_delta(other) + elif is_offsetlike(other): + # Array/Index of DateOffset objects + result = self._addsub_offset_array(other, operator.add) + elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): + # DatetimeIndex, ndarray[datetime64] + return self._add_datelike(other) + elif is_integer_dtype(other): + result = self._addsub_int_array(other, operator.add) + elif is_float_dtype(other) or is_period_dtype(other): + # Explicitly catch invalid dtypes + raise TypeError("cannot add {dtype}-dtype to {cls}" + .format(dtype=other.dtype, + cls=type(self).__name__)) + elif is_extension_array_dtype(other): + # Categorical op will raise; defer explicitly + return NotImplemented + else: # pragma: no cover + return NotImplemented + + return result + + cls.__add__ = __add__ + + def __radd__(self, other): + # alias for __add__ + return self.__add__(other) + cls.__radd__ = __radd__ + + def __sub__(self, other): + other = lib.item_from_zerodim(other) + if isinstance(other, (ABCSeries, ABCDataFrame)): + return NotImplemented + + # scalar others + elif other is NaT: + result = self._sub_nat() + elif isinstance(other, (Tick, timedelta, np.timedelta64)): + result = self._add_delta(-other) + elif isinstance(other, DateOffset): + # specifically _not_ a Tick + result = self._add_offset(-other) + elif isinstance(other, (datetime, np.datetime64)): + result = self._sub_datelike(other) + elif lib.is_integer(other): + # This check must come after the check for np.timedelta64 + # as is_integer returns True for these + result = self.shift(-other) + elif isinstance(other, Period): + result = self._sub_period(other) + + # array-like others + elif is_timedelta64_dtype(other): + # TimedeltaIndex, ndarray[timedelta64] + result = self._add_delta(-other) + elif is_offsetlike(other): + # Array/Index of DateOffset objects + result = self._addsub_offset_array(other, operator.sub) + elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): + # DatetimeIndex, ndarray[datetime64] + result = self._sub_datelike(other) + elif is_period_dtype(other): + # PeriodIndex + result = self._sub_period_array(other) + elif is_integer_dtype(other): + result = self._addsub_int_array(other, operator.sub) + elif isinstance(other, ABCIndexClass): + raise TypeError("cannot subtract {cls} and {typ}" + .format(cls=type(self).__name__, + typ=type(other).__name__)) + elif is_float_dtype(other): + # Explicitly catch invalid dtypes + raise TypeError("cannot subtract {dtype}-dtype from {cls}" + .format(dtype=other.dtype, + cls=type(self).__name__)) + elif is_extension_array_dtype(other): + # Categorical op will raise; defer explicitly + return NotImplemented + else: # pragma: no cover + return NotImplemented + + return result + + cls.__sub__ = __sub__ + + def __rsub__(self, other): + if is_datetime64_dtype(other) and is_timedelta64_dtype(self): + # ndarray[datetime64] cannot be subtracted from self, so + # we need to wrap in DatetimeArray/Index and flip the operation + if not isinstance(other, DatetimeLikeArrayMixin): + # Avoid down-casting DatetimeIndex + from pandas.core.arrays import DatetimeArrayMixin + other = DatetimeArrayMixin(other) + return other - self + elif (is_datetime64_any_dtype(self) and hasattr(other, 'dtype') and + not is_datetime64_any_dtype(other)): + # GH#19959 datetime - datetime is well-defined as timedelta, + # but any other type - datetime is not well-defined. + raise TypeError("cannot subtract {cls} from {typ}" + .format(cls=type(self).__name__, + typ=type(other).__name__)) + return -(self - other) + cls.__rsub__ = __rsub__ + + def __iadd__(self, other): + # alias for __add__ + return self.__add__(other) + cls.__iadd__ = __iadd__ + + def __isub__(self, other): + # alias for __sub__ + return self.__sub__(other) + cls.__isub__ = __isub__ + # -------------------------------------------------------------- # Comparison Methods diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 29f97b344f267..00d53ad82b2dc 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -from datetime import datetime, timedelta +from datetime import datetime, timedelta, time import warnings import numpy as np @@ -8,11 +8,12 @@ from pandas._libs import tslib from pandas._libs.tslib import Timestamp, NaT, iNaT from pandas._libs.tslibs import ( + normalize_date, conversion, fields, timezones, resolution as libresolution) from pandas.util._decorators import cache_readonly -from pandas.errors import PerformanceWarning +from pandas.errors import PerformanceWarning, AbstractMethodError from pandas import compat from pandas.core.dtypes.common import ( @@ -30,11 +31,14 @@ from pandas.core.algorithms import checked_add_with_arr from pandas.tseries.frequencies import to_offset -from pandas.tseries.offsets import Tick +from pandas.tseries.offsets import Tick, Day, generate_range from pandas.core.arrays import datetimelike as dtl +_midnight = time(0, 0) + + def _to_m8(key, tz=None): """ Timestamp-like => dt64 @@ -177,13 +181,16 @@ def _simple_new(cls, values, freq=None, tz=None, **kwargs): result._tz = timezones.tz_standardize(tz) return result - def __new__(cls, values, freq=None, tz=None): + def __new__(cls, values, freq=None, tz=None, dtype=None): if tz is None and hasattr(values, 'tz'): # e.g. DatetimeIndex tz = values.tz freq, freq_infer = dtl.maybe_infer_freq(freq) + # if dtype has an embedded tz, capture it + tz = dtl.validate_tz_from_dtype(dtype, tz) + result = cls._simple_new(values, freq=freq, tz=tz) if freq_infer: inferred = result.inferred_freq @@ -194,6 +201,117 @@ def __new__(cls, values, freq=None, tz=None): # constructor, this does not call _deepcopy_if_needed return result + @classmethod + def _generate_range(cls, start, end, periods, freq, tz=None, + normalize=False, ambiguous='raise', closed=None): + if com.count_not_none(start, end, periods, freq) != 3: + raise ValueError('Of the four parameters: start, end, periods, ' + 'and freq, exactly three must be specified') + freq = to_offset(freq) + + if start is not None: + start = Timestamp(start) + + if end is not None: + end = Timestamp(end) + + if start is None and end is None: + if closed is not None: + raise ValueError("Closed has to be None if not both of start" + "and end are defined") + + left_closed, right_closed = dtl.validate_endpoints(closed) + + start, end, _normalized = _maybe_normalize_endpoints(start, end, + normalize) + + tz, inferred_tz = _infer_tz_from_endpoints(start, end, tz) + + if hasattr(freq, 'delta') and freq != Day(): + # sub-Day Tick + if inferred_tz is None and tz is not None: + # naive dates + if start is not None and start.tz is None: + start = start.tz_localize(tz, ambiguous=False) + + if end is not None and end.tz is None: + end = end.tz_localize(tz, ambiguous=False) + + if start and end: + if start.tz is None and end.tz is not None: + start = start.tz_localize(end.tz, ambiguous=False) + + if end.tz is None and start.tz is not None: + end = end.tz_localize(start.tz, ambiguous=False) + + if cls._use_cached_range(freq, _normalized, start, end): + index = cls._cached_range(start, end, periods=periods, + freq=freq) + else: + index = _generate_regular_range(cls, start, end, periods, freq) + + else: + + if tz is not None: + # naive dates + if start is not None and start.tz is not None: + start = start.replace(tzinfo=None) + + if end is not None and end.tz is not None: + end = end.replace(tzinfo=None) + + if start and end: + if start.tz is None and end.tz is not None: + end = end.replace(tzinfo=None) + + if end.tz is None and start.tz is not None: + start = start.replace(tzinfo=None) + + if freq is not None: + if cls._use_cached_range(freq, _normalized, start, end): + index = cls._cached_range(start, end, periods=periods, + freq=freq) + else: + index = _generate_regular_range(cls, start, end, + periods, freq) + + if tz is not None and getattr(index, 'tz', None) is None: + arr = conversion.tz_localize_to_utc( + ensure_int64(index.values), + tz, ambiguous=ambiguous) + + index = cls(arr) + + # index is localized datetime64 array -> have to convert + # start/end as well to compare + if start is not None: + start = start.tz_localize(tz).asm8 + if end is not None: + end = end.tz_localize(tz).asm8 + else: + # Create a linearly spaced date_range in local time + start = start.tz_localize(tz) + end = end.tz_localize(tz) + arr = np.linspace(start.value, end.value, periods) + index = cls._simple_new(arr.astype('M8[ns]'), freq=None, tz=tz) + + if not left_closed and len(index) and index[0] == start: + index = index[1:] + if not right_closed and len(index) and index[-1] == end: + index = index[:-1] + + return cls._simple_new(index.values, freq=freq, tz=tz) + + @classmethod + def _use_cached_range(cls, freq, _normalized, start, end): + # DatetimeArray is mutable, so is not cached + return False + + @classmethod + def _cached_range(cls, start=None, end=None, + periods=None, freq=None, **kwargs): + raise AbstractMethodError(cls) + # ----------------------------------------------------------------- # Descriptive Properties @@ -1085,3 +1203,109 @@ def to_julian_date(self): DatetimeArrayMixin._add_comparison_ops() +DatetimeArrayMixin._add_datetimelike_methods() + + +def _generate_regular_range(cls, start, end, periods, freq): + if isinstance(freq, Tick): + stride = freq.nanos + if periods is None: + b = Timestamp(start).value + # cannot just use e = Timestamp(end) + 1 because arange breaks when + # stride is too large, see GH10887 + e = (b + (Timestamp(end).value - b) // stride * stride + + stride // 2 + 1) + # end.tz == start.tz by this point due to _generate implementation + tz = start.tz + elif start is not None: + b = Timestamp(start).value + e = b + np.int64(periods) * stride + tz = start.tz + elif end is not None: + e = Timestamp(end).value + stride + b = e - np.int64(periods) * stride + tz = end.tz + else: + raise ValueError("at least 'start' or 'end' should be specified " + "if a 'period' is given.") + + data = np.arange(b, e, stride, dtype=np.int64) + data = cls._simple_new(data.view(_NS_DTYPE), None, tz=tz) + else: + tz = None + if isinstance(start, Timestamp): + tz = start.tz + start = start.to_pydatetime() + + if isinstance(end, Timestamp): + tz = end.tz + end = end.to_pydatetime() + + xdr = generate_range(start=start, end=end, + periods=periods, offset=freq) + + values = np.array([x.value for x in xdr]) + data = cls._simple_new(values, freq=freq, tz=tz) + + return data + + +def _infer_tz_from_endpoints(start, end, tz): + """ + If a timezone is not explicitly given via `tz`, see if one can + be inferred from the `start` and `end` endpoints. If more than one + of these inputs provides a timezone, require that they all agree. + + Parameters + ---------- + start : Timestamp + end : Timestamp + tz : tzinfo or None + + Returns + ------- + tz : tzinfo or None + inferred_tz : tzinfo or None + + Raises + ------ + TypeError : if start and end timezones do not agree + """ + try: + inferred_tz = timezones.infer_tzinfo(start, end) + except Exception: + raise TypeError('Start and end cannot both be tz-aware with ' + 'different timezones') + + inferred_tz = timezones.maybe_get_tz(inferred_tz) + tz = timezones.maybe_get_tz(tz) + + if tz is not None and inferred_tz is not None: + if not timezones.tz_compare(inferred_tz, tz): + raise AssertionError("Inferred time zone not equal to passed " + "time zone") + + elif inferred_tz is not None: + tz = inferred_tz + + return tz, inferred_tz + + +def _maybe_normalize_endpoints(start, end, normalize): + _normalized = True + + if start is not None: + if normalize: + start = normalize_date(start) + _normalized = True + else: + _normalized = _normalized and start.time() == _midnight + + if end is not None: + if normalize: + end = normalize_date(end) + _normalized = True + else: + _normalized = _normalized and end.time() == _midnight + + return start, end, _normalized diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 9c98f73312dbf..481d5313f0e25 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -386,6 +386,7 @@ def _maybe_convert_timedelta(self, other): PeriodArrayMixin._add_comparison_ops() +PeriodArrayMixin._add_datetimelike_methods() # ------------------------------------------------------------------- diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index cc93644677463..df9e57cb5f0e1 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -4,7 +4,7 @@ import numpy as np from pandas._libs import tslibs -from pandas._libs.tslibs import Timedelta, NaT +from pandas._libs.tslibs import Timedelta, Timestamp, NaT, iNaT from pandas._libs.tslibs.fields import get_timedelta_field from pandas._libs.tslibs.timedeltas import array_to_timedelta64 @@ -16,6 +16,7 @@ from pandas.core.dtypes.missing import isna import pandas.core.common as com +from pandas.core.algorithms import checked_add_with_arr from pandas.tseries.offsets import Tick from pandas.tseries.frequencies import to_offset @@ -230,6 +231,36 @@ def _add_delta(self, delta): return type(self)(new_values, freq='infer') + def _add_datelike(self, other): + # adding a timedeltaindex to a datetimelike + from pandas.core.arrays import DatetimeArrayMixin + if isinstance(other, (DatetimeArrayMixin, np.ndarray)): + # if other is an ndarray, we assume it is datetime64-dtype + # defer to implementation in DatetimeIndex + if not isinstance(other, DatetimeArrayMixin): + other = DatetimeArrayMixin(other) + return other + self + else: + assert other is not NaT + other = Timestamp(other) + i8 = self.asi8 + result = checked_add_with_arr(i8, other.value, + arr_mask=self._isnan) + result = self._maybe_mask_results(result, fill_value=iNaT) + return DatetimeArrayMixin(result) + + def _addsub_offset_array(self, other, op): + # Add or subtract Array-like of DateOffset objects + try: + # TimedeltaIndex can only operate with a subset of DateOffset + # subclasses. Incompatible classes will raise AttributeError, + # which we re-raise as TypeError + return dtl.DatetimeLikeArrayMixin._addsub_offset_array(self, other, + op) + except AttributeError: + raise TypeError("Cannot add/subtract non-tick DateOffset to {cls}" + .format(cls=type(self).__name__)) + def _evaluate_with_timedelta_like(self, other, op): if isinstance(other, ABCSeries): # GH#19042 @@ -370,6 +401,7 @@ def f(x): TimedeltaArrayMixin._add_comparison_ops() +TimedeltaArrayMixin._add_datetimelike_methods() # --------------------------------------------------------------------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 20926ea5163af..f09fe8c8abdcf 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -274,6 +274,26 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, return IntervalIndex(data, dtype=dtype, name=name, copy=copy, closed=closed) + elif (is_datetime64_any_dtype(data) or + (dtype is not None and is_datetime64_any_dtype(dtype)) or + 'tz' in kwargs): + from pandas import DatetimeIndex + result = DatetimeIndex(data, copy=copy, name=name, + dtype=dtype, **kwargs) + if dtype is not None and is_dtype_equal(_o_dtype, dtype): + return Index(result.to_pydatetime(), dtype=_o_dtype) + else: + return result + + elif (is_timedelta64_dtype(data) or + (dtype is not None and is_timedelta64_dtype(dtype))): + from pandas import TimedeltaIndex + result = TimedeltaIndex(data, copy=copy, name=name, **kwargs) + if dtype is not None and _o_dtype == dtype: + return Index(result.to_pytimedelta(), dtype=_o_dtype) + else: + return result + # extension dtype elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype): data = np.asarray(data) @@ -290,27 +310,6 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, # index-like elif isinstance(data, (np.ndarray, Index, ABCSeries)): - - if (is_datetime64_any_dtype(data) or - (dtype is not None and is_datetime64_any_dtype(dtype)) or - 'tz' in kwargs): - from pandas import DatetimeIndex - result = DatetimeIndex(data, copy=copy, name=name, - dtype=dtype, **kwargs) - if dtype is not None and is_dtype_equal(_o_dtype, dtype): - return Index(result.to_pydatetime(), dtype=_o_dtype) - else: - return result - - elif (is_timedelta64_dtype(data) or - (dtype is not None and is_timedelta64_dtype(dtype))): - from pandas import TimedeltaIndex - result = TimedeltaIndex(data, copy=copy, name=name, **kwargs) - if dtype is not None and _o_dtype == dtype: - return Index(result.to_pytimedelta(), dtype=_o_dtype) - else: - return result - if dtype is not None: try: diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 8f05a9a887830..3f8c07fe7cd21 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -3,8 +3,6 @@ Base and utility classes for tseries type pandas objects. """ import warnings -import operator -from datetime import datetime, timedelta from pandas import compat from pandas.compat.numpy import function as nv @@ -13,7 +11,6 @@ import numpy as np from pandas._libs import lib, iNaT, NaT -from pandas._libs.tslibs.period import Period from pandas._libs.tslibs.timestamps import round_ns from pandas.core.dtypes.common import ( @@ -24,32 +21,23 @@ is_list_like, is_scalar, is_bool_dtype, - is_offsetlike, is_categorical_dtype, is_datetime_or_timedelta_dtype, is_float_dtype, is_integer_dtype, is_object_dtype, - is_string_dtype, - is_datetime64_dtype, - is_datetime64tz_dtype, - is_datetime64_any_dtype, - is_period_dtype, - is_timedelta64_dtype) + is_string_dtype) from pandas.core.dtypes.generic import ( - ABCIndex, ABCSeries, ABCDataFrame, ABCPeriodIndex, ABCIndexClass) + ABCIndex, ABCSeries, ABCPeriodIndex, ABCIndexClass) from pandas.core.dtypes.missing import isna from pandas.core import common as com, algorithms, ops -from pandas.errors import NullFrequencyError import pandas.io.formats.printing as printing from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin from pandas.core.indexes.base import Index, _index_shared_docs from pandas.util._decorators import Appender, cache_readonly import pandas.core.dtypes.concat as _concat -import pandas.tseries.frequencies as frequencies -from pandas.tseries.offsets import Tick, DateOffset import pandas.core.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -206,30 +194,6 @@ def floor(self, freq): def ceil(self, freq): return self._round(freq, np.ceil) - @classmethod - def _validate_frequency(cls, index, freq, **kwargs): - """ - Validate that a frequency is compatible with the values of a given - DatetimeIndex or TimedeltaIndex - - Parameters - ---------- - index : DatetimeIndex or TimedeltaIndex - The index on which to determine if the given frequency is valid - freq : DateOffset - The frequency to validate - """ - inferred = index.inferred_freq - if index.empty or inferred == freq.freqstr: - return None - - on_freq = cls._generate_range( - index[0], None, len(index), None, freq, **kwargs) - if not np.array_equal(index.asi8, on_freq.asi8): - msg = ('Inferred frequency {infer} from passed values does not ' - 'conform to passed frequency {passed}') - raise ValueError(msg.format(infer=inferred, passed=freq.freqstr)) - class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): """ common ops mixin to support a unified interface datetimelike Index """ @@ -584,56 +548,9 @@ def _add_datetimelike_methods(cls): """ def __add__(self, other): - other = lib.item_from_zerodim(other) - if isinstance(other, (ABCSeries, ABCDataFrame)): - return NotImplemented - - # scalar others - elif other is NaT: - result = self._add_nat() - elif isinstance(other, (Tick, timedelta, np.timedelta64)): - result = self._add_delta(other) - elif isinstance(other, DateOffset): - # specifically _not_ a Tick - result = self._add_offset(other) - elif isinstance(other, (datetime, np.datetime64)): - result = self._add_datelike(other) - elif is_integer(other): - # This check must come after the check for np.timedelta64 - # as is_integer returns True for these - result = self.shift(other) - - # array-like others - elif is_timedelta64_dtype(other): - # TimedeltaIndex, ndarray[timedelta64] - result = self._add_delta(other) - elif is_offsetlike(other): - # Array/Index of DateOffset objects - result = self._addsub_offset_array(other, operator.add) - elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): - # DatetimeIndex, ndarray[datetime64] - return self._add_datelike(other) - elif is_integer_dtype(other): - result = self._addsub_int_array(other, operator.add) - elif is_float_dtype(other) or is_period_dtype(other): - # Explicitly catch invalid dtypes - raise TypeError("cannot add {dtype}-dtype to {cls}" - .format(dtype=other.dtype, - cls=type(self).__name__)) - elif is_categorical_dtype(other): - # Categorical op will raise; defer explicitly - return NotImplemented - else: # pragma: no cover - return NotImplemented - - if result is NotImplemented: - return NotImplemented - elif not isinstance(result, Index): - # Index.__new__ will choose appropriate subclass for dtype - result = Index(result) - res_name = ops.get_op_result_name(self, other) - result.name = res_name - return result + # dispatch to ExtensionArray implementation + result = super(cls, self).__add__(other) + return wrap_arithmetic_op(self, other, result) cls.__add__ = __add__ @@ -643,95 +560,17 @@ def __radd__(self, other): cls.__radd__ = __radd__ def __sub__(self, other): - from pandas import Index - - other = lib.item_from_zerodim(other) - if isinstance(other, (ABCSeries, ABCDataFrame)): - return NotImplemented - - # scalar others - elif other is NaT: - result = self._sub_nat() - elif isinstance(other, (Tick, timedelta, np.timedelta64)): - result = self._add_delta(-other) - elif isinstance(other, DateOffset): - # specifically _not_ a Tick - result = self._add_offset(-other) - elif isinstance(other, (datetime, np.datetime64)): - result = self._sub_datelike(other) - elif is_integer(other): - # This check must come after the check for np.timedelta64 - # as is_integer returns True for these - result = self.shift(-other) - elif isinstance(other, Period): - result = self._sub_period(other) - - # array-like others - elif is_timedelta64_dtype(other): - # TimedeltaIndex, ndarray[timedelta64] - result = self._add_delta(-other) - elif is_offsetlike(other): - # Array/Index of DateOffset objects - result = self._addsub_offset_array(other, operator.sub) - elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other): - # DatetimeIndex, ndarray[datetime64] - result = self._sub_datelike(other) - elif is_period_dtype(other): - # PeriodIndex - result = self._sub_period_array(other) - elif is_integer_dtype(other): - result = self._addsub_int_array(other, operator.sub) - elif isinstance(other, Index): - raise TypeError("cannot subtract {cls} and {typ}" - .format(cls=type(self).__name__, - typ=type(other).__name__)) - elif is_float_dtype(other): - # Explicitly catch invalid dtypes - raise TypeError("cannot subtract {dtype}-dtype from {cls}" - .format(dtype=other.dtype, - cls=type(self).__name__)) - elif is_categorical_dtype(other): - # Categorical op will raise; defer explicitly - return NotImplemented - else: # pragma: no cover - return NotImplemented - - if result is NotImplemented: - return NotImplemented - elif not isinstance(result, Index): - # Index.__new__ will choose appropriate subclass for dtype - result = Index(result) - res_name = ops.get_op_result_name(self, other) - result.name = res_name - return result + # dispatch to ExtensionArray implementation + result = super(cls, self).__sub__(other) + return wrap_arithmetic_op(self, other, result) cls.__sub__ = __sub__ def __rsub__(self, other): - if is_datetime64_dtype(other) and is_timedelta64_dtype(self): - # ndarray[datetime64] cannot be subtracted from self, so - # we need to wrap in DatetimeIndex and flip the operation - from pandas import DatetimeIndex - return DatetimeIndex(other) - self - elif (is_datetime64_any_dtype(self) and hasattr(other, 'dtype') and - not is_datetime64_any_dtype(other)): - # GH#19959 datetime - datetime is well-defined as timedelta, - # but any other type - datetime is not well-defined. - raise TypeError("cannot subtract {cls} from {typ}" - .format(cls=type(self).__name__, - typ=type(other).__name__)) - return -(self - other) - cls.__rsub__ = __rsub__ + result = super(cls, self).__rsub__(other) + return wrap_arithmetic_op(self, other, result) - def __iadd__(self, other): - # alias for __add__ - return self.__add__(other) - cls.__iadd__ = __iadd__ - - def __isub__(self, other): - # alias for __sub__ - return self.__sub__(other) - cls.__isub__ = __isub__ + cls.__rsub__ = __rsub__ def isin(self, values): """ @@ -754,44 +593,6 @@ def isin(self, values): return algorithms.isin(self.asi8, values.asi8) - def shift(self, n, freq=None): - """ - Specialized shift which produces a DatetimeIndex - - Parameters - ---------- - n : int - Periods to shift by - freq : DateOffset or timedelta-like, optional - - Returns - ------- - shifted : DatetimeIndex - """ - if freq is not None and freq != self.freq: - if isinstance(freq, compat.string_types): - freq = frequencies.to_offset(freq) - offset = n * freq - result = self + offset - - if hasattr(self, 'tz'): - result._tz = self.tz - - return result - - if n == 0: - # immutable so OK - return self - - if self.freq is None: - raise NullFrequencyError("Cannot shift with no freq") - - start = self[0] + n * self.freq - end = self[-1] + n * self.freq - attribs = self._get_attributes_dict() - return self._generate_range(start=start, end=end, periods=None, - **attribs) - def repeat(self, repeats, *args, **kwargs): """ Analogous to ndarray.repeat @@ -896,3 +697,16 @@ def _ensure_datetimelike_to_i8(other): # period array cannot be coerces to int other = Index(other).asi8 return other + + +def wrap_arithmetic_op(self, other, result): + if result is NotImplemented: + return NotImplemented + + if not isinstance(result, Index): + # Index.__new__ will choose appropriate subclass for dtype + result = Index(result) + + res_name = ops.get_op_result_name(self, other) + result.name = res_name + return result diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 933e7406b5af3..3ee91a106f36b 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -40,7 +40,7 @@ from pandas.core.indexes.datetimelike import ( DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin) from pandas.tseries.offsets import ( - generate_range, Tick, CDay, prefix_mapping) + generate_range, CDay, prefix_mapping) from pandas.core.tools.timedeltas import to_timedelta from pandas.util._decorators import ( @@ -49,7 +49,7 @@ import pandas.tseries.offsets as offsets import pandas.core.tools.datetimes as tools -from pandas._libs import (lib, index as libindex, tslibs, tslib as libts, +from pandas._libs import (lib, index as libindex, tslib as libts, join as libjoin, Timestamp) from pandas._libs.tslibs import (timezones, conversion, fields, parsing, ccalendar) @@ -98,9 +98,6 @@ def wrapper(self, other): return compat.set_function_name(wrapper, opname, cls) -_midnight = time(0, 0) - - def _new_DatetimeIndex(cls, d): """ This is called upon unpickling, rather than the default which doesn't have arguments and breaks __new__ """ @@ -323,7 +320,7 @@ def __new__(cls, data=None, data = tools.to_datetime(data, dayfirst=dayfirst, yearfirst=yearfirst) - if isinstance(data, DatetimeIndex): + if isinstance(data, DatetimeArrayMixin): if tz is None: tz = data.tz elif data.tz is None: @@ -375,135 +372,19 @@ def __new__(cls, data=None, return subarr._deepcopy_if_needed(ref_to_data, copy) @classmethod - def _generate_range(cls, start, end, periods, name, freq, tz=None, - normalize=False, ambiguous='raise', closed=None): - if com.count_not_none(start, end, periods, freq) != 3: - raise ValueError('Of the four parameters: start, end, periods, ' - 'and freq, exactly three must be specified') - - _normalized = True - - if start is not None: - start = Timestamp(start) - - if end is not None: - end = Timestamp(end) - - if start is None and end is None: - if closed is not None: - raise ValueError("Closed has to be None if not both of start" - "and end are defined") - - left_closed, right_closed = dtl.validate_endpoints(closed) - - try: - inferred_tz = timezones.infer_tzinfo(start, end) - except Exception: - raise TypeError('Start and end cannot both be tz-aware with ' - 'different timezones') - - inferred_tz = timezones.maybe_get_tz(inferred_tz) - tz = timezones.maybe_get_tz(tz) - - if tz is not None and inferred_tz is not None: - if not timezones.tz_compare(inferred_tz, tz): - raise AssertionError("Inferred time zone not equal to passed " - "time zone") - - elif inferred_tz is not None: - tz = inferred_tz - - if start is not None: - if normalize: - start = tslibs.normalize_date(start) - _normalized = True - else: - _normalized = _normalized and start.time() == _midnight - - if end is not None: - if normalize: - end = tslibs.normalize_date(end) - _normalized = True - else: - _normalized = _normalized and end.time() == _midnight - - if hasattr(freq, 'delta') and freq != offsets.Day(): - if inferred_tz is None and tz is not None: - # naive dates - if start is not None and start.tz is None: - start = start.tz_localize(tz, ambiguous=False) - - if end is not None and end.tz is None: - end = end.tz_localize(tz, ambiguous=False) - - if start and end: - if start.tz is None and end.tz is not None: - start = start.tz_localize(end.tz, ambiguous=False) + @Appender(DatetimeArrayMixin._generate_range.__doc__) + def _generate_range(cls, start, end, periods, name=None, freq=None, + tz=None, normalize=False, ambiguous='raise', + closed=None): + out = super(DatetimeIndex, cls)._generate_range( + start, end, periods, freq, + tz=tz, normalize=normalize, ambiguous=ambiguous, closed=closed) + out.name = name + return out - if end.tz is None and start.tz is not None: - end = end.tz_localize(start.tz, ambiguous=False) - - if _use_cached_range(freq, _normalized, start, end): - index = cls._cached_range(start, end, periods=periods, - freq=freq, name=name) - else: - index = _generate_regular_range(cls, start, end, periods, freq) - - else: - - if tz is not None: - # naive dates - if start is not None and start.tz is not None: - start = start.replace(tzinfo=None) - - if end is not None and end.tz is not None: - end = end.replace(tzinfo=None) - - if start and end: - if start.tz is None and end.tz is not None: - end = end.replace(tzinfo=None) - - if end.tz is None and start.tz is not None: - start = start.replace(tzinfo=None) - - if freq is not None: - if _use_cached_range(freq, _normalized, start, end): - index = cls._cached_range(start, end, periods=periods, - freq=freq, name=name) - else: - index = _generate_regular_range(cls, start, end, - periods, freq) - - if tz is not None and getattr(index, 'tz', None) is None: - arr = conversion.tz_localize_to_utc(ensure_int64(index), - tz, - ambiguous=ambiguous) - - index = cls(arr) - - # index is localized datetime64 array -> have to convert - # start/end as well to compare - if start is not None: - start = start.tz_localize(tz).asm8 - if end is not None: - end = end.tz_localize(tz).asm8 - else: - # Create a linearly spaced date_range in local time - start = start.tz_localize(tz) - end = end.tz_localize(tz) - index = tools.to_datetime(np.linspace(start.value, - end.value, periods), - utc=True) - index = index.tz_convert(tz) - - if not left_closed and len(index) and index[0] == start: - index = index[1:] - if not right_closed and len(index) and index[-1] == end: - index = index[:-1] - - index = cls._simple_new(index.values, name=name, freq=freq, tz=tz) - - return index + @classmethod + def _use_cached_range(cls, freq, _normalized, start, end): + return _use_cached_range(freq, _normalized, start, end) def _convert_for_op(self, value): """ Convert value to be insertable to ndarray """ @@ -1685,48 +1566,6 @@ def to_julian_date(self): DatetimeIndex._add_datetimelike_methods() -def _generate_regular_range(cls, start, end, periods, freq): - if isinstance(freq, Tick): - stride = freq.nanos - if periods is None: - b = Timestamp(start).value - # cannot just use e = Timestamp(end) + 1 because arange breaks when - # stride is too large, see GH10887 - e = (b + (Timestamp(end).value - b) // stride * stride + - stride // 2 + 1) - # end.tz == start.tz by this point due to _generate implementation - tz = start.tz - elif start is not None: - b = Timestamp(start).value - e = b + np.int64(periods) * stride - tz = start.tz - elif end is not None: - e = Timestamp(end).value + stride - b = e - np.int64(periods) * stride - tz = end.tz - else: - raise ValueError("at least 'start' or 'end' should be specified " - "if a 'period' is given.") - - data = np.arange(b, e, stride, dtype=np.int64) - data = cls._simple_new(data.view(_NS_DTYPE), None, tz=tz) - else: - if isinstance(start, Timestamp): - start = start.to_pydatetime() - - if isinstance(end, Timestamp): - end = end.to_pydatetime() - - xdr = generate_range(start=start, end=end, - periods=periods, offset=freq) - - dates = list(xdr) - # utc = len(dates) > 0 and dates[0].tzinfo is not None - data = tools.to_datetime(dates) - - return data - - def date_range(start=None, end=None, periods=None, freq=None, tz=None, normalize=False, name=None, closed=None, **kwargs): """ diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 006758f276f87..9f14d4cfd5863 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -14,7 +14,6 @@ pandas_dtype, ensure_int64) from pandas.core.dtypes.missing import isna -from pandas.core.dtypes.generic import ABCSeries from pandas.core.arrays.timedeltas import ( TimedeltaArrayMixin, _is_convertible_to_td, _to_m8) @@ -25,18 +24,17 @@ import pandas.compat as compat from pandas.tseries.frequencies import to_offset -from pandas.core.algorithms import checked_add_with_arr from pandas.core.base import _shared_docs from pandas.core.indexes.base import _index_shared_docs import pandas.core.common as com import pandas.core.dtypes.concat as _concat from pandas.util._decorators import Appender, Substitution, deprecate_kwarg from pandas.core.indexes.datetimelike import ( - TimelikeOps, DatetimeIndexOpsMixin) + TimelikeOps, DatetimeIndexOpsMixin, wrap_arithmetic_op) from pandas.core.tools.timedeltas import ( to_timedelta, _coerce_scalar_to_timedelta_type) from pandas._libs import (lib, index as libindex, - join as libjoin, Timedelta, NaT, iNaT) + join as libjoin, Timedelta, NaT) def _wrap_field_accessor(name): @@ -197,11 +195,10 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, if unit is not None: data = to_timedelta(data, unit=unit, box=False) - if not isinstance(data, (np.ndarray, Index, ABCSeries)): - if is_scalar(data): - raise ValueError('TimedeltaIndex() must be called with a ' - 'collection of some kind, %s was passed' - % repr(data)) + if is_scalar(data): + raise ValueError('TimedeltaIndex() must be called with a ' + 'collection of some kind, {data} was passed' + .format(data=repr(data))) # convert if not already if getattr(data, 'dtype', None) != _TD_DTYPE: @@ -223,7 +220,8 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, return subarr @classmethod - def _generate_range(cls, start, end, periods, name, freq, closed=None): + def _generate_range(cls, start, end, periods, + name=None, freq=None, closed=None): # TimedeltaArray gets `name` via **kwargs, so we need to explicitly # override it if name is passed as a positional argument return super(TimedeltaIndex, cls)._generate_range(start, end, @@ -262,37 +260,7 @@ def _maybe_update_attributes(self, attrs): def _evaluate_with_timedelta_like(self, other, op): result = TimedeltaArrayMixin._evaluate_with_timedelta_like(self, other, op) - if result is NotImplemented: - return NotImplemented - return Index(result, name=self.name, copy=False) - - def _add_datelike(self, other): - # adding a timedeltaindex to a datetimelike - from pandas import Timestamp, DatetimeIndex - if isinstance(other, (DatetimeIndex, np.ndarray)): - # if other is an ndarray, we assume it is datetime64-dtype - # defer to implementation in DatetimeIndex - other = DatetimeIndex(other) - return other + self - else: - assert other is not NaT - other = Timestamp(other) - i8 = self.asi8 - result = checked_add_with_arr(i8, other.value, - arr_mask=self._isnan) - result = self._maybe_mask_results(result, fill_value=iNaT) - return DatetimeIndex(result) - - def _addsub_offset_array(self, other, op): - # Add or subtract Array-like of DateOffset objects - try: - # TimedeltaIndex can only operate with a subset of DateOffset - # subclasses. Incompatible classes will raise AttributeError, - # which we re-raise as TypeError - return DatetimeIndexOpsMixin._addsub_offset_array(self, other, op) - except AttributeError: - raise TypeError("Cannot add/subtract non-tick DateOffset to {cls}" - .format(cls=type(self).__name__)) + return wrap_arithmetic_op(self, other, result) def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): from pandas.io.formats.format import Timedelta64Formatter diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 69e802fbaa3f0..24f34884dc077 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -16,6 +16,11 @@ def test_from_dti(self, tz_naive_fixture): arr = DatetimeArrayMixin(dti) assert list(dti) == list(arr) + # Check that Index.__new__ knows what to do with DatetimeArray + dti2 = pd.Index(arr) + assert isinstance(dti2, pd.DatetimeIndex) + assert list(dti2) == list(arr) + def test_astype_object(self, tz_naive_fixture): tz = tz_naive_fixture dti = pd.date_range('2016-01-01', periods=3, tz=tz) @@ -32,6 +37,11 @@ def test_from_tdi(self): arr = TimedeltaArrayMixin(tdi) assert list(arr) == list(tdi) + # Check that Index.__new__ knows what to do with TimedeltaArray + tdi2 = pd.Index(arr) + assert isinstance(tdi2, pd.TimedeltaIndex) + assert list(tdi2) == list(arr) + def test_astype_object(self): tdi = pd.TimedeltaIndex(['1 Day', '3 Hours']) arr = TimedeltaArrayMixin(tdi) @@ -48,6 +58,11 @@ def test_from_pi(self): arr = PeriodArrayMixin(pi) assert list(arr) == list(pi) + # Check that Index.__new__ knows what to do with TimedeltaArray + pi2 = pd.Index(arr) + assert isinstance(pi2, pd.PeriodIndex) + assert list(pi2) == list(arr) + def test_astype_object(self): pi = pd.period_range('2016', freq='Q', periods=3) arr = PeriodArrayMixin(pi)