diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 74bf90eb94065..a9cf0301747b0 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1,15 +1,19 @@ # -*- coding: utf-8 -*- +import operator import numpy as np from pandas._libs import lib, iNaT, NaT -from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds +from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds, Timedelta from pandas._libs.tslibs.period import ( DIFFERENT_FREQ_INDEX, IncompatibleFrequency) +from pandas.errors import NullFrequencyError + from pandas.tseries import frequencies +from pandas.tseries.offsets import Tick -from pandas.core.dtypes.common import is_period_dtype +from pandas.core.dtypes.common import is_period_dtype, is_timedelta64_dtype import pandas.core.common as com from pandas.core.algorithms import checked_add_with_arr @@ -130,6 +134,17 @@ def inferred_freq(self): except ValueError: return None + @property # NB: override with cache_readonly in immutable subclasses + def _resolution(self): + return frequencies.Resolution.get_reso_from_freq(self.freqstr) + + @property # NB: override with cache_readonly in immutable subclasses + def resolution(self): + """ + Returns day, hour, minute, second, millisecond or microsecond + """ + return frequencies.Resolution.get_str(self._resolution) + # ------------------------------------------------------------------ # Arithmetic Methods @@ -228,3 +243,43 @@ def _sub_period_array(self, other): mask = (self._isnan) | (other._isnan) new_values[mask] = NaT return new_values + + def _addsub_int_array(self, other, op): + """ + Add or subtract array-like of integers equivalent to applying + `shift` pointwise. + + Parameters + ---------- + other : Index, ExtensionArray, np.ndarray + integer-dtype + op : {operator.add, operator.sub} + + Returns + ------- + result : same class as self + """ + assert op in [operator.add, operator.sub] + if is_period_dtype(self): + # easy case for PeriodIndex + if op is operator.sub: + other = -other + res_values = checked_add_with_arr(self.asi8, other, + arr_mask=self._isnan) + res_values = res_values.view('i8') + res_values[self._isnan] = iNaT + return self._from_ordinals(res_values, freq=self.freq) + + elif self.freq is None: + # GH#19123 + raise NullFrequencyError("Cannot shift with no freq") + + elif isinstance(self.freq, Tick): + # easy case where we can convert to timedelta64 operation + td = Timedelta(self.freq) + return op(self, td * other) + + # We should only get here with DatetimeIndex; dispatch + # to _addsub_offset_array + assert not is_timedelta64_dtype(self) + return op(self, np.array(other) * self.freq) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 34749f3631fca..cbdbc021cfd72 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -94,19 +94,28 @@ def _timezone(self): @property def offset(self): """get/set the frequency of the instance""" - msg = ('DatetimeIndex.offset has been deprecated and will be removed ' - 'in a future version; use DatetimeIndex.freq instead.') + msg = ('{cls}.offset has been deprecated and will be removed ' + 'in a future version; use {cls}.freq instead.' + .format(cls=type(self).__name__)) warnings.warn(msg, FutureWarning, stacklevel=2) return self.freq @offset.setter def offset(self, value): """get/set the frequency of the instance""" - msg = ('DatetimeIndex.offset has been deprecated and will be removed ' - 'in a future version; use DatetimeIndex.freq instead.') + msg = ('{cls}.offset has been deprecated and will be removed ' + 'in a future version; use {cls}.freq instead.' + .format(cls=type(self).__name__)) warnings.warn(msg, FutureWarning, stacklevel=2) self.freq = value + @property # NB: override with cache_readonly in immutable subclasses + def is_normalized(self): + """ + Returns True if all of the dates are at midnight ("no time") + """ + return conversion.is_date_array_normalized(self.asi8, self.tz) + # ---------------------------------------------------------------- # Array-like Methods @@ -582,7 +591,7 @@ def date(self): def to_julian_date(self): """ - Convert DatetimeIndex to float64 ndarray of Julian Dates. + Convert Datetime Array to float64 ndarray of Julian Dates. 0 Julian date is noon January 1, 4713 BC. http://en.wikipedia.org/wiki/Julian_day """ diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 697c2ffc88050..08e894ce6c31c 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -5,15 +5,17 @@ import numpy as np from pandas._libs import lib -from pandas._libs.tslib import NaT +from pandas._libs.tslib import NaT, iNaT from pandas._libs.tslibs.period import ( Period, IncompatibleFrequency, DIFFERENT_FREQ_INDEX, get_period_field_arr) from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds from pandas._libs.tslibs.fields import isleapyear_arr +from pandas import compat from pandas.util._decorators import cache_readonly +from pandas.core.dtypes.common import is_integer_dtype, is_float_dtype from pandas.core.dtypes.dtypes import PeriodDtype from pandas.tseries import frequencies @@ -33,6 +35,47 @@ def f(self): return property(f) +def _period_array_cmp(opname, cls): + """ + Wrap comparison operations to convert Period-like to PeriodDtype + """ + nat_result = True if opname == '__ne__' else False + + def wrapper(self, other): + op = getattr(self._ndarray_values, opname) + if isinstance(other, Period): + if other.freq != self.freq: + msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) + raise IncompatibleFrequency(msg) + + result = op(other.ordinal) + elif isinstance(other, PeriodArrayMixin): + if other.freq != self.freq: + msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) + raise IncompatibleFrequency(msg) + + result = op(other._ndarray_values) + + mask = self._isnan | other._isnan + if mask.any(): + result[mask] = nat_result + + return result + elif other is NaT: + result = np.empty(len(self._ndarray_values), dtype=bool) + result.fill(nat_result) + else: + other = Period(other, freq=self.freq) + result = op(other.ordinal) + + if self.hasnans: + result[self._isnan] = nat_result + + return result + + return compat.set_function_name(wrapper, opname, cls) + + class PeriodArrayMixin(DatetimeLikeArrayMixin): @property def _box_func(self): @@ -59,12 +102,62 @@ def freq(self): @freq.setter def freq(self, value): msg = ('Setting {cls}.freq has been deprecated and will be ' - 'removed in a future version; use PeriodIndex.asfreq instead. ' + 'removed in a future version; use {cls}.asfreq instead. ' 'The {cls}.freq setter is not guaranteed to work.') warnings.warn(msg.format(cls=type(self).__name__), FutureWarning, stacklevel=2) self._freq = value + # -------------------------------------------------------------------- + # Constructors + + _attributes = ["freq"] + + def _get_attributes_dict(self): + """return an attributes dict for my class""" + return {k: getattr(self, k, None) for k in self._attributes} + + # TODO: share docstring? + def _shallow_copy(self, values=None, **kwargs): + if values is None: + values = self._ndarray_values + attributes = self._get_attributes_dict() + attributes.update(kwargs) + return self._simple_new(values, **attributes) + + @classmethod + def _simple_new(cls, values, freq=None): + """ + Values can be any type that can be coerced to Periods. + Ordinals in an ndarray are fastpath-ed to `_from_ordinals` + """ + if not is_integer_dtype(values): + values = np.array(values, copy=False) + if len(values) > 0 and is_float_dtype(values): + raise TypeError("{cls} can't take floats" + .format(cls=cls.__name__)) + return cls(values, freq=freq) + + return cls._from_ordinals(values, freq) + + __new__ = _simple_new # For now... + + @classmethod + def _from_ordinals(cls, values, freq=None): + """ + Values should be int ordinals + `__new__` & `_simple_new` cooerce to ordinals and call this method + """ + + values = np.array(values, dtype='int64', copy=False) + + result = object.__new__(cls) + result._data = values + if freq is None: + raise ValueError('freq is not specified and cannot be inferred') + result._freq = Period._maybe_convert_freq(freq) + return result + # -------------------------------------------------------------------- # Vectorized analogues of Period properties @@ -115,6 +208,52 @@ def _sub_period(self, other): return new_data + def _add_offset(self, other): + assert not isinstance(other, Tick) + base = frequencies.get_base_alias(other.rule_code) + if base != self.freq.rule_code: + msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) + raise IncompatibleFrequency(msg) + return self.shift(other.n) + + def _add_delta_td(self, other): + assert isinstance(other, (timedelta, np.timedelta64, Tick)) + nanos = delta_to_nanoseconds(other) + own_offset = frequencies.to_offset(self.freq.rule_code) + + if isinstance(own_offset, Tick): + offset_nanos = delta_to_nanoseconds(own_offset) + if np.all(nanos % offset_nanos == 0): + return self.shift(nanos // offset_nanos) + + # raise when input doesn't have freq + raise IncompatibleFrequency("Input has different freq from " + "{cls}(freq={freqstr})" + .format(cls=type(self).__name__, + freqstr=self.freqstr)) + + def _add_delta(self, other): + ordinal_delta = self._maybe_convert_timedelta(other) + return self.shift(ordinal_delta) + + def shift(self, n): + """ + Specialized shift which produces an Period Array/Index + + Parameters + ---------- + n : int + Periods to shift by + + Returns + ------- + shifted : Period Array/Index + """ + values = self._ndarray_values + n * self.freq.n + if self.hasnans: + values[self._isnan] = iNaT + return self._shallow_copy(values=values) + def _maybe_convert_timedelta(self, other): """ Convert timedelta-like input to an integer multiple of self.freq @@ -161,3 +300,16 @@ def _maybe_convert_timedelta(self, other): msg = "Input has different freq from {cls}(freq={freqstr})" raise IncompatibleFrequency(msg.format(cls=type(self).__name__, freqstr=self.freqstr)) + + @classmethod + def _add_comparison_methods(cls): + """ add in comparison methods """ + cls.__eq__ = _period_array_cmp('__eq__', cls) + cls.__ne__ = _period_array_cmp('__ne__', cls) + cls.__lt__ = _period_array_cmp('__lt__', cls) + cls.__gt__ = _period_array_cmp('__gt__', cls) + cls.__le__ = _period_array_cmp('__le__', cls) + cls.__ge__ = _period_array_cmp('__ge__', cls) + + +PeriodArrayMixin._add_comparison_methods() diff --git a/pandas/core/arrays/timedelta.py b/pandas/core/arrays/timedelta.py index b93cff51bbde9..4258679e1ddc5 100644 --- a/pandas/core/arrays/timedelta.py +++ b/pandas/core/arrays/timedelta.py @@ -98,7 +98,7 @@ def total_seconds(self): Returns ------- - seconds : ndarray, Float64Index, or Series + seconds : [ndarray, Float64Index, Series] When the calling object is a TimedeltaArray, the return type is ndarray. When the calling object is a TimedeltaIndex, the return type is a Float64Index. When the calling object diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index cc9b09654289d..5d0cc27979ee5 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -12,7 +12,7 @@ import numpy as np -from pandas._libs import lib, iNaT, NaT, Timedelta +from pandas._libs import lib, iNaT, NaT from pandas._libs.tslibs.period import Period from pandas._libs.tslibs.timestamps import round_ns @@ -40,7 +40,7 @@ ABCIndex, ABCSeries, ABCDataFrame, ABCPeriodIndex, ABCIndexClass) from pandas.core.dtypes.missing import isna from pandas.core import common as com, algorithms, ops -from pandas.core.algorithms import checked_add_with_arr + from pandas.errors import NullFrequencyError, PerformanceWarning import pandas.io.formats.printing as printing @@ -240,6 +240,8 @@ class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin): inferred_freq = cache_readonly(DatetimeLikeArrayMixin.inferred_freq.fget) _isnan = cache_readonly(DatetimeLikeArrayMixin._isnan.fget) hasnans = cache_readonly(DatetimeLikeArrayMixin.hasnans.fget) + _resolution = cache_readonly(DatetimeLikeArrayMixin._resolution.fget) + resolution = cache_readonly(DatetimeLikeArrayMixin.resolution.fget) def equals(self, other): """ @@ -641,17 +643,6 @@ def _format_attrs(self): attrs.append(('freq', freq)) return attrs - @cache_readonly - def _resolution(self): - return frequencies.Resolution.get_reso_from_freq(self.freqstr) - - @cache_readonly - def resolution(self): - """ - Returns day, hour, minute, second, millisecond or microsecond - """ - return frequencies.Resolution.get_str(self._resolution) - def _convert_scalar_indexer(self, key, kind=None): """ we don't allow integer or float indexing on datetime-like when using @@ -717,46 +708,6 @@ def _addsub_offset_array(self, other, op): kwargs['freq'] = 'infer' return self._constructor(res_values, **kwargs) - def _addsub_int_array(self, other, op): - """ - Add or subtract array-like of integers equivalent to applying - `shift` pointwise. - - Parameters - ---------- - other : Index, np.ndarray - integer-dtype - op : {operator.add, operator.sub} - - Returns - ------- - result : same class as self - """ - assert op in [operator.add, operator.sub] - if is_period_dtype(self): - # easy case for PeriodIndex - if op is operator.sub: - other = -other - res_values = checked_add_with_arr(self.asi8, other, - arr_mask=self._isnan) - res_values = res_values.view('i8') - res_values[self._isnan] = iNaT - return self._from_ordinals(res_values, freq=self.freq) - - elif self.freq is None: - # GH#19123 - raise NullFrequencyError("Cannot shift with no freq") - - elif isinstance(self.freq, Tick): - # easy case where we can convert to timedelta64 operation - td = Timedelta(self.freq) - return op(self, td * other) - - # We should only get here with DatetimeIndex; dispatch - # to _addsub_offset_array - assert not is_timedelta64_dtype(self) - return op(self, np.array(other) * self.freq) - @classmethod def _add_datetimelike_methods(cls): """ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 1d6dc14593e3b..353a92e25d3cf 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -317,6 +317,7 @@ def _add_comparison_methods(cls): _is_numeric_dtype = False _infer_as_myclass = True _timezone = cache_readonly(DatetimeArrayMixin._timezone.fget) + is_normalized = cache_readonly(DatetimeArrayMixin.is_normalized.fget) def __new__(cls, data=None, freq=None, start=None, end=None, periods=None, tz=None, @@ -1706,13 +1707,6 @@ def inferred_type(self): def is_all_dates(self): return True - @cache_readonly - def is_normalized(self): - """ - Returns True if all of the dates are at midnight ("no time") - """ - return conversion.is_date_array_normalized(self.asi8, self.tz) - @cache_readonly def _resolution(self): return libresolution.resolution(self.asi8, self.tz) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 892ef611a34f3..e6c2094defb2d 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -1,5 +1,5 @@ # pylint: disable=E1101,E1103,W0232 -from datetime import datetime, timedelta +from datetime import datetime import numpy as np import warnings @@ -20,7 +20,6 @@ import pandas.tseries.frequencies as frequencies from pandas.tseries.frequencies import get_freq_code as _gfc -from pandas.tseries.offsets import Tick from pandas.core.indexes.datetimes import DatetimeIndex, Int64Index, Index from pandas.core.indexes.datetimelike import DatelikeOps, DatetimeIndexOpsMixin @@ -32,7 +31,6 @@ DIFFERENT_FREQ_INDEX, _validate_end_alias, _quarter_to_myear) from pandas._libs.tslibs import resolution, period -from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds from pandas.core.arrays.period import PeriodArrayMixin from pandas.core.base import _shared_docs @@ -41,7 +39,7 @@ from pandas import compat from pandas.util._decorators import (Appender, Substitution, cache_readonly, deprecate_kwarg) -from pandas.compat import zip, u +from pandas.compat import zip import pandas.core.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -72,47 +70,6 @@ def dt64arr_to_periodarr(data, freq, tz): # --- Period index sketch -def _period_index_cmp(opname, cls): - """ - Wrap comparison operations to convert Period-like to PeriodDtype - """ - nat_result = True if opname == '__ne__' else False - - def wrapper(self, other): - op = getattr(self._ndarray_values, opname) - if isinstance(other, Period): - if other.freq != self.freq: - msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) - raise IncompatibleFrequency(msg) - - result = op(other.ordinal) - elif isinstance(other, PeriodIndex): - if other.freq != self.freq: - msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) - raise IncompatibleFrequency(msg) - - result = op(other._ndarray_values) - - mask = self._isnan | other._isnan - if mask.any(): - result[mask] = nat_result - - return result - elif other is tslib.NaT: - result = np.empty(len(self._ndarray_values), dtype=bool) - result.fill(nat_result) - else: - other = Period(other, freq=self.freq) - result = op(other.ordinal) - - if self.hasnans: - result[self._isnan] = nat_result - - return result - - return compat.set_function_name(wrapper, opname, cls) - - def _new_PeriodIndex(cls, **d): # GH13277 for unpickling if d['data'].dtype == 'int64': @@ -222,16 +179,6 @@ class PeriodIndex(PeriodArrayMixin, DatelikeOps, DatetimeIndexOpsMixin, _engine_type = libindex.PeriodEngine - @classmethod - def _add_comparison_methods(cls): - """ add in comparison methods """ - cls.__eq__ = _period_index_cmp('__eq__', cls) - cls.__ne__ = _period_index_cmp('__ne__', cls) - cls.__lt__ = _period_index_cmp('__lt__', cls) - cls.__gt__ = _period_index_cmp('__gt__', cls) - cls.__le__ = _period_index_cmp('__le__', cls) - cls.__ge__ = _period_index_cmp('__ge__', cls) - def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, periods=None, tz=None, dtype=None, copy=False, name=None, **fields): @@ -358,15 +305,9 @@ def _from_ordinals(cls, values, name=None, freq=None, **kwargs): Values should be int ordinals `__new__` & `_simple_new` cooerce to ordinals and call this method """ + result = super(PeriodIndex, cls)._from_ordinals(values, freq) - values = np.array(values, dtype='int64', copy=False) - - result = object.__new__(cls) - result._data = values result.name = name - if freq is None: - raise ValueError('freq is not specified and cannot be inferred') - result._freq = Period._maybe_convert_freq(freq) result._reset_identity() return result @@ -374,14 +315,6 @@ def _shallow_copy_with_infer(self, values=None, **kwargs): """ we always want to return a PeriodIndex """ return self._shallow_copy(values=values, **kwargs) - def _shallow_copy(self, values=None, freq=None, **kwargs): - if freq is None: - freq = self.freq - if values is None: - values = self._ndarray_values - return super(PeriodIndex, self)._shallow_copy(values=values, - freq=freq, **kwargs) - def _coerce_scalar_to_index(self, item): """ we need to coerce a scalar to a compat for our index type @@ -565,7 +498,6 @@ def asfreq(self, freq=None, how='E'): Returns ------- - new : PeriodIndex with the new frequency Examples @@ -665,34 +597,6 @@ def to_timestamp(self, freq=None, how='start'): new_data = period.periodarr_to_dt64arr(new_data._ndarray_values, base) return DatetimeIndex(new_data, freq='infer', name=self.name) - def _add_offset(self, other): - assert not isinstance(other, Tick) - base = frequencies.get_base_alias(other.rule_code) - if base != self.freq.rule_code: - msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) - raise IncompatibleFrequency(msg) - return self.shift(other.n) - - def _add_delta_td(self, other): - assert isinstance(other, (timedelta, np.timedelta64, Tick)) - nanos = delta_to_nanoseconds(other) - own_offset = frequencies.to_offset(self.freq.rule_code) - - if isinstance(own_offset, Tick): - offset_nanos = delta_to_nanoseconds(own_offset) - if np.all(nanos % offset_nanos == 0): - return self.shift(nanos // offset_nanos) - - # raise when input doesn't have freq - raise IncompatibleFrequency("Input has different freq from " - "{cls}(freq={freqstr})" - .format(cls=type(self).__name__, - freqstr=self.freqstr)) - - def _add_delta(self, other): - ordinal_delta = self._maybe_convert_timedelta(other) - return self.shift(ordinal_delta) - def _sub_period(self, other): # If the operation is well-defined, we return an object-Index # of DateOffsets. Null entries are filled with pd.NaT @@ -701,24 +605,6 @@ def _sub_period(self, other): # TODO: Should name=self.name be passed here? return Index(new_data) - def shift(self, n): - """ - Specialized shift which produces an PeriodIndex - - Parameters - ---------- - n : int - Periods to shift by - - Returns - ------- - shifted : PeriodIndex - """ - values = self._ndarray_values + n * self.freq.n - if self.hasnans: - values[self._isnan] = tslib.iNaT - return self._shallow_copy(values=values) - @property def inferred_type(self): # b/c data is represented as ints make sure we can't have ambiguous @@ -961,15 +847,14 @@ def _apply_meta(self, rawarr): name=self.name) return rawarr - def _format_native_types(self, na_rep=u('NaT'), date_format=None, - **kwargs): + def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): values = self.astype(object).values if date_format: formatter = lambda dt: dt.strftime(date_format) else: - formatter = lambda dt: u('%s') % dt + formatter = lambda dt: u'%s' % dt if self.hasnans: mask = self._isnan diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index e9b9fb63811d5..25f72d38eeb9b 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -21,7 +21,7 @@ from pandas.core.indexes.base import Index from pandas.core.indexes.numeric import Int64Index import pandas.compat as compat -from pandas.compat import u + from pandas.tseries.frequencies import to_offset from pandas.core.algorithms import checked_add_with_arr from pandas.core.base import _shared_docs @@ -413,8 +413,7 @@ def _addsub_offset_array(self, other, op): raise TypeError("Cannot add/subtract non-tick DateOffset to {cls}" .format(cls=type(self).__name__)) - def _format_native_types(self, na_rep=u('NaT'), - date_format=None, **kwargs): + def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): from pandas.io.formats.format import Timedelta64Formatter return Timedelta64Formatter(values=self, nat_rep=na_rep,