From 8b9358611e0cf47b231bdd0f6cbeeef4c73302eb Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 8 Jul 2018 11:48:45 -0500 Subject: [PATCH] Move most remaining arith helpers --- pandas/core/arrays/datetimelike.py | 92 +++++++++++++++++------- pandas/core/arrays/datetimes.py | 73 +++++++++++++++++++ pandas/core/arrays/period.py | 78 ++++++++++++++++++-- pandas/core/arrays/timedelta.py | 41 ++++++++++- pandas/core/indexes/datetimelike.py | 30 +------- pandas/core/indexes/datetimes.py | 58 +-------------- pandas/core/indexes/period.py | 60 ---------------- pandas/core/indexes/timedeltas.py | 32 --------- pandas/tests/arrays/test_datetimelike.py | 57 +++++++++++++++ 9 files changed, 310 insertions(+), 211 deletions(-) create mode 100644 pandas/tests/arrays/test_datetimelike.py diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 118516192e4ed..65f34b847f8d0 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- import operator +import warnings import numpy as np @@ -8,12 +9,16 @@ from pandas._libs.tslibs.period import ( DIFFERENT_FREQ_INDEX, IncompatibleFrequency) -from pandas.errors import NullFrequencyError +from pandas.errors import NullFrequencyError, PerformanceWarning from pandas.tseries import frequencies from pandas.tseries.offsets import Tick -from pandas.core.dtypes.common import is_period_dtype, is_timedelta64_dtype +from pandas.core.dtypes.common import ( + is_period_dtype, + is_timedelta64_dtype, + is_object_dtype) + import pandas.core.common as com from pandas.core.algorithms import checked_add_with_arr @@ -108,38 +113,43 @@ def __getitem__(self, key): if is_int: val = getitem(key) return self._box_func(val) + + if com.is_bool_indexer(key): + key = np.asarray(key) + if key.all(): + key = slice(0, None, None) + else: + key = lib.maybe_booleans_to_slice(key.view(np.uint8)) + + attribs = self._get_attributes_dict() + + is_period = is_period_dtype(self) + if is_period: + freq = self.freq else: - if com.is_bool_indexer(key): - key = np.asarray(key) - if key.all(): - key = slice(0, None, None) + freq = None + if isinstance(key, slice): + if self.freq is not None and key.step is not None: + freq = key.step * self.freq else: - key = lib.maybe_booleans_to_slice(key.view(np.uint8)) + freq = self.freq - attribs = self._get_attributes_dict() + attribs['freq'] = freq - is_period = is_period_dtype(self) + result = getitem(key) + if result.ndim > 1: + # To support MPL which performs slicing with 2 dim + # even though it only has 1 dim by definition if is_period: - freq = self.freq - else: - freq = None - if isinstance(key, slice): - if self.freq is not None and key.step is not None: - freq = key.step * self.freq - else: - freq = self.freq - - attribs['freq'] = freq + return self._simple_new(result, **attribs) + return result - result = getitem(key) - if result.ndim > 1: - # To support MPL which performs slicing with 2 dim - # even though it only has 1 dim by definition - if is_period: - return self._simple_new(result, **attribs) - return result + return self._simple_new(result, **attribs) - return self._simple_new(result, **attribs) + def astype(self, dtype, copy=True): + if is_object_dtype(dtype): + return self._box_values(self.asi8) + return super(DatetimeLikeArrayMixin, self).astype(dtype, copy) # ------------------------------------------------------------------ # Null Handling @@ -397,3 +407,31 @@ def _addsub_int_array(self, other, op): # to _addsub_offset_array assert not is_timedelta64_dtype(self) return op(self, np.array(other) * self.freq) + + def _addsub_offset_array(self, other, op): + """ + Add or subtract array-like of DateOffset objects + + Parameters + ---------- + other : Index, np.ndarray + object-dtype containing pd.DateOffset objects + op : {operator.add, operator.sub} + + Returns + ------- + result : same class as self + """ + assert op in [operator.add, operator.sub] + if len(other) == 1: + return op(self, other[0]) + + warnings.warn("Adding/subtracting array of DateOffsets to " + "{cls} not vectorized" + .format(cls=type(self).__name__), PerformanceWarning) + + res_values = op(self.astype('O').values, np.array(other)) + kwargs = {} + if not is_period_dtype(self): + kwargs['freq'] = 'infer' + return type(self)(res_values, **kwargs) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e6e0544bfa22b..05bc3d23cfb8e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from datetime import timedelta import warnings import numpy as np @@ -11,15 +12,18 @@ resolution as libresolution) from pandas.util._decorators import cache_readonly +from pandas.errors import PerformanceWarning from pandas.core.dtypes.common import ( _NS_DTYPE, is_datetime64tz_dtype, is_datetime64_dtype, + is_timedelta64_dtype, _ensure_int64) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.tseries.frequencies import to_offset, DateOffset +from pandas.tseries.offsets import Tick from .datetimelike import DatetimeLikeArrayMixin @@ -104,6 +108,10 @@ def _simple_new(cls, values, freq=None, tz=None, **kwargs): return result def __new__(cls, values, freq=None, tz=None): + if tz is None and hasattr(values, 'tz'): + # e.g. DatetimeIndex + tz = values.tz + if (freq is not None and not isinstance(freq, DateOffset) and freq != 'infer'): freq = to_offset(freq) @@ -131,6 +139,17 @@ def dtype(self): return _NS_DTYPE return DatetimeTZDtype('ns', self.tz) + @property + def tz(self): + # GH 18595 + return self._tz + + @tz.setter + def tz(self, value): + # GH 3746: Prevent localizing or converting the index by setting tz + raise AttributeError("Cannot directly set timezone. Use tz_localize() " + "or tz_convert() as appropriate") + @property def tzinfo(self): """ @@ -244,6 +263,60 @@ def _sub_datelike_dti(self, other): new_values[mask] = iNaT return new_values.view('timedelta64[ns]') + def _add_offset(self, offset): + assert not isinstance(offset, Tick) + try: + if self.tz is not None: + values = self.tz_localize(None) + else: + values = self + result = offset.apply_index(values) + if self.tz is not None: + result = result.tz_localize(self.tz) + + except NotImplementedError: + warnings.warn("Non-vectorized DateOffset being applied to Series " + "or DatetimeIndex", PerformanceWarning) + result = self.astype('O') + offset + + return type(self)(result, freq='infer') + + def _add_delta(self, delta): + """ + Add a timedelta-like, DateOffset, or TimedeltaIndex-like object + to self. + + Parameters + ---------- + delta : {timedelta, np.timedelta64, DateOffset, + TimedelaIndex, ndarray[timedelta64]} + + Returns + ------- + result : same type as self + + Notes + ----- + The result's name is set outside of _add_delta by the calling + method (__add__ or __sub__) + """ + from pandas.core.arrays.timedelta import TimedeltaArrayMixin + + if isinstance(delta, (Tick, timedelta, np.timedelta64)): + new_values = self._add_delta_td(delta) + elif is_timedelta64_dtype(delta): + if not isinstance(delta, TimedeltaArrayMixin): + delta = TimedeltaArrayMixin(delta) + new_values = self._add_delta_tdi(delta) + else: + new_values = self.astype('O') + delta + + tz = 'UTC' if self.tz is not None else None + result = type(self)(new_values, tz=tz, freq='infer') + if self.tz is not None and self.tz is not utc: + result = result.tz_convert(self.tz) + return result + # ----------------------------------------------------------------- # Timezone Conversion and Localization Methods diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index fa416e30493e9..35baa3262d3dd 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -8,14 +8,16 @@ from pandas._libs.tslib import NaT, iNaT from pandas._libs.tslibs.period import ( Period, IncompatibleFrequency, DIFFERENT_FREQ_INDEX, - get_period_field_arr) + get_period_field_arr, period_asfreq_arr) +from pandas._libs.tslibs import period as libperiod from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds from pandas._libs.tslibs.fields import isleapyear_arr from pandas import compat from pandas.util._decorators import cache_readonly -from pandas.core.dtypes.common import is_integer_dtype, is_float_dtype +from pandas.core.dtypes.common import ( + is_integer_dtype, is_float_dtype, is_period_dtype) from pandas.core.dtypes.dtypes import PeriodDtype from pandas.tseries import frequencies @@ -113,12 +115,23 @@ def freq(self, value): _attributes = ["freq"] + def __new__(cls, values, freq=None, **kwargs): + if is_period_dtype(values): + # PeriodArray, PeriodIndex + if freq is not None and values.freq != freq: + raise IncompatibleFrequency(freq, values.freq) + freq = values.freq + values = values.asi8 + + return cls._simple_new(values, freq, **kwargs) + @classmethod def _simple_new(cls, values, freq=None, **kwargs): """ Values can be any type that can be coerced to Periods. Ordinals in an ndarray are fastpath-ed to `_from_ordinals` """ + if not is_integer_dtype(values): values = np.array(values, copy=False) if len(values) > 0 and is_float_dtype(values): @@ -128,8 +141,6 @@ def _simple_new(cls, values, freq=None, **kwargs): return cls._from_ordinals(values, freq) - __new__ = _simple_new # For now... - @classmethod def _from_ordinals(cls, values, freq=None): """ @@ -173,6 +184,65 @@ def is_leap_year(self): """ Logical indicating if the date belongs to a leap year """ return isleapyear_arr(np.asarray(self.year)) + def asfreq(self, freq=None, how='E'): + """ + Convert the Period Array/Index to the specified frequency `freq`. + + Parameters + ---------- + freq : str + a frequency + how : str {'E', 'S'} + 'E', 'END', or 'FINISH' for end, + 'S', 'START', or 'BEGIN' for start. + Whether the elements should be aligned to the end + or start within pa period. January 31st ('END') vs. + January 1st ('START') for example. + + Returns + ------- + new : Period Array/Index with the new frequency + + Examples + -------- + >>> pidx = pd.period_range('2010-01-01', '2015-01-01', freq='A') + >>> pidx + + [2010, ..., 2015] + Length: 6, Freq: A-DEC + + >>> pidx.asfreq('M') + + [2010-12, ..., 2015-12] + Length: 6, Freq: M + + >>> pidx.asfreq('M', how='S') + + [2010-01, ..., 2015-01] + Length: 6, Freq: M + """ + how = libperiod._validate_end_alias(how) + + freq = Period._maybe_convert_freq(freq) + + base1, mult1 = frequencies.get_freq_code(self.freq) + base2, mult2 = frequencies.get_freq_code(freq) + + asi8 = self.asi8 + # mult1 can't be negative or 0 + end = how == 'E' + if end: + ordinal = asi8 + mult1 - 1 + else: + ordinal = asi8 + + new_data = period_asfreq_arr(ordinal, base1, base2, end) + + if self.hasnans: + new_data[self._isnan] = iNaT + + return self._simple_new(new_data, self.name, freq=freq) + # ------------------------------------------------------------------ # Arithmetic Methods diff --git a/pandas/core/arrays/timedelta.py b/pandas/core/arrays/timedelta.py index af0fa2c3477f4..20a9f263bf5e1 100644 --- a/pandas/core/arrays/timedelta.py +++ b/pandas/core/arrays/timedelta.py @@ -10,7 +10,8 @@ from pandas import compat -from pandas.core.dtypes.common import _TD_DTYPE, _ensure_int64 +from pandas.core.dtypes.common import ( + _TD_DTYPE, _ensure_int64, is_timedelta64_dtype) from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import isna @@ -58,7 +59,11 @@ def _simple_new(cls, values, freq=None, **kwargs): if values.dtype == np.object_: values = array_to_timedelta64(values) if values.dtype != _TD_DTYPE: - values = _ensure_int64(values).view(_TD_DTYPE) + if is_timedelta64_dtype(values): + # non-nano unit + values = values.astype(_TD_DTYPE) + else: + values = _ensure_int64(values).view(_TD_DTYPE) result = object.__new__(cls) result._data = values @@ -92,6 +97,38 @@ def _sub_datelike(self, other): raise TypeError("cannot subtract a datelike from a {cls}" .format(cls=type(self).__name__)) + def _add_delta(self, delta): + """ + Add a timedelta-like, Tick, or TimedeltaIndex-like object + to self. + + Parameters + ---------- + delta : timedelta, np.timedelta64, Tick, TimedeltaArray, TimedeltaIndex + + Returns + ------- + result : same type as self + + Notes + ----- + The result's name is set outside of _add_delta by the calling + method (__add__ or __sub__) + """ + if isinstance(delta, (Tick, timedelta, np.timedelta64)): + new_values = self._add_delta_td(delta) + elif isinstance(delta, TimedeltaArrayMixin): + new_values = self._add_delta_tdi(delta) + elif is_timedelta64_dtype(delta): + # ndarray[timedelta64] --> wrap in TimedeltaArray/Index + delta = type(self)(delta) + new_values = self._add_delta_tdi(delta) + else: + raise TypeError("cannot add the type {0} to a TimedeltaIndex" + .format(type(delta))) + + return type(self)(new_values, freq='infer') + def _evaluate_with_timedelta_like(self, other, op): if isinstance(other, ABCSeries): # GH#19042 diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index daaa40addf6c0..37e20496aafce 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -41,7 +41,7 @@ from pandas.core.dtypes.missing import isna from pandas.core import common as com, algorithms, ops -from pandas.errors import NullFrequencyError, PerformanceWarning +from pandas.errors import NullFrequencyError import pandas.io.formats.printing as printing from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin @@ -599,34 +599,6 @@ def _convert_scalar_indexer(self, key, kind=None): return (super(DatetimeIndexOpsMixin, self) ._convert_scalar_indexer(key, kind=kind)) - def _addsub_offset_array(self, other, op): - """ - Add or subtract array-like of DateOffset objects - - Parameters - ---------- - other : Index, np.ndarray - object-dtype containing pd.DateOffset objects - op : {operator.add, operator.sub} - - Returns - ------- - result : same class as self - """ - assert op in [operator.add, operator.sub] - if len(other) == 1: - return op(self, other[0]) - - warnings.warn("Adding/subtracting array of DateOffsets to " - "{cls} not vectorized" - .format(cls=type(self).__name__), PerformanceWarning) - - res_values = op(self.astype('O').values, np.array(other)) - kwargs = {} - if not is_period_dtype(self): - kwargs['freq'] = 'infer' - return type(self)(res_values, **kwargs) - @classmethod def _add_datetimelike_methods(cls): """ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 53b1186b37a90..13344a77d8cc6 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -2,7 +2,7 @@ from __future__ import division import operator import warnings -from datetime import time, datetime, timedelta +from datetime import time, datetime import numpy as np from pytz import utc @@ -16,7 +16,6 @@ is_datetime64_dtype, is_datetimetz, is_dtype_equal, - is_timedelta64_dtype, is_integer, is_float, is_integer_dtype, @@ -33,7 +32,6 @@ from pandas.core.dtypes.missing import isna import pandas.core.dtypes.concat as _concat -from pandas.errors import PerformanceWarning from pandas.core.algorithms import checked_add_with_arr from pandas.core.arrays.datetimes import DatetimeArrayMixin @@ -824,60 +822,6 @@ def _maybe_update_attributes(self, attrs): attrs['freq'] = 'infer' return attrs - def _add_delta(self, delta): - """ - Add a timedelta-like, DateOffset, or TimedeltaIndex-like object - to self. - - Parameters - ---------- - delta : {timedelta, np.timedelta64, DateOffset, - TimedelaIndex, ndarray[timedelta64]} - - Returns - ------- - result : DatetimeIndex - - Notes - ----- - The result's name is set outside of _add_delta by the calling - method (__add__ or __sub__) - """ - from pandas import TimedeltaIndex - - if isinstance(delta, (Tick, timedelta, np.timedelta64)): - new_values = self._add_delta_td(delta) - elif is_timedelta64_dtype(delta): - if not isinstance(delta, TimedeltaIndex): - delta = TimedeltaIndex(delta) - new_values = self._add_delta_tdi(delta) - else: - new_values = self.astype('O') + delta - - tz = 'UTC' if self.tz is not None else None - result = DatetimeIndex(new_values, tz=tz, freq='infer') - if self.tz is not None and self.tz is not utc: - result = result.tz_convert(self.tz) - return result - - def _add_offset(self, offset): - assert not isinstance(offset, Tick) - try: - if self.tz is not None: - values = self.tz_localize(None) - else: - values = self - result = offset.apply_index(values) - if self.tz is not None: - result = result.tz_localize(self.tz) - - except NotImplementedError: - warnings.warn("Non-vectorized DateOffset being applied to Series " - "or DatetimeIndex", PerformanceWarning) - result = self.astype('O') + offset - - return DatetimeIndex(result, freq='infer') - def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs): from pandas.io.formats.format import _get_format_datetime64_from_values format = _get_format_datetime64_from_values(self, date_format) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 3fa525b8617c5..4ded4527e0a36 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -480,66 +480,6 @@ def is_full(self): values = self.asi8 return ((values[1:] - values[:-1]) < 2).all() - def asfreq(self, freq=None, how='E'): - """ - Convert the PeriodIndex to the specified frequency `freq`. - - Parameters - ---------- - - freq : str - a frequency - how : str {'E', 'S'} - 'E', 'END', or 'FINISH' for end, - 'S', 'START', or 'BEGIN' for start. - Whether the elements should be aligned to the end - or start within pa period. January 31st ('END') vs. - January 1st ('START') for example. - - Returns - ------- - new : PeriodIndex with the new frequency - - Examples - -------- - >>> pidx = pd.period_range('2010-01-01', '2015-01-01', freq='A') - >>> pidx - - [2010, ..., 2015] - Length: 6, Freq: A-DEC - - >>> pidx.asfreq('M') - - [2010-12, ..., 2015-12] - Length: 6, Freq: M - - >>> pidx.asfreq('M', how='S') - - [2010-01, ..., 2015-01] - Length: 6, Freq: M - """ - how = _validate_end_alias(how) - - freq = Period._maybe_convert_freq(freq) - - base1, mult1 = _gfc(self.freq) - base2, mult2 = _gfc(freq) - - asi8 = self.asi8 - # mult1 can't be negative or 0 - end = how == 'E' - if end: - ordinal = asi8 + mult1 - 1 - else: - ordinal = asi8 - - new_data = period.period_asfreq_arr(ordinal, base1, base2, end) - - if self.hasnans: - new_data[self._isnan] = tslib.iNaT - - return self._simple_new(new_data, self.name, freq=freq) - year = _wrap_field_accessor('year') month = _wrap_field_accessor('month') day = _wrap_field_accessor('day') diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index b1dc6e6286b98..3af825455caac 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -337,38 +337,6 @@ def _maybe_update_attributes(self, attrs): attrs['freq'] = 'infer' return attrs - def _add_delta(self, delta): - """ - Add a timedelta-like, Tick, or TimedeltaIndex-like object - to self. - - Parameters - ---------- - delta : {timedelta, np.timedelta64, Tick, TimedeltaIndex} - - Returns - ------- - result : TimedeltaIndex - - Notes - ----- - The result's name is set outside of _add_delta by the calling - method (__add__ or __sub__) - """ - if isinstance(delta, (Tick, timedelta, np.timedelta64)): - new_values = self._add_delta_td(delta) - elif isinstance(delta, TimedeltaIndex): - new_values = self._add_delta_tdi(delta) - elif is_timedelta64_dtype(delta): - # ndarray[timedelta64] --> wrap in TimedeltaIndex - delta = TimedeltaIndex(delta) - new_values = self._add_delta_tdi(delta) - else: - raise TypeError("cannot add the type {0} to a TimedeltaIndex" - .format(type(delta))) - - return TimedeltaIndex(new_values, freq='infer') - def _evaluate_with_timedelta_like(self, other, op): result = TimedeltaArrayMixin._evaluate_with_timedelta_like(self, other, op) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py new file mode 100644 index 0000000000000..d116b3bcff86a --- /dev/null +++ b/pandas/tests/arrays/test_datetimelike.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +import numpy as np + +import pandas as pd + +from pandas.core.arrays.datetimes import DatetimeArrayMixin +from pandas.core.arrays.timedelta import TimedeltaArrayMixin +from pandas.core.arrays.period import PeriodArrayMixin + + +class TestDatetimeArray(object): + + def test_from_dti(self, tz_naive_fixture): + tz = tz_naive_fixture + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + arr = DatetimeArrayMixin(dti) + assert list(dti) == list(arr) + + def test_astype_object(self, tz_naive_fixture): + tz = tz_naive_fixture + dti = pd.date_range('2016-01-01', periods=3, tz=tz) + arr = DatetimeArrayMixin(dti) + asobj = arr.astype('O') + assert isinstance(asobj, np.ndarray) + assert asobj.dtype == 'O' + assert list(asobj) == list(dti) + + +class TestTimedeltaArray(object): + def test_from_tdi(self): + tdi = pd.TimedeltaIndex(['1 Day', '3 Hours']) + arr = TimedeltaArrayMixin(tdi) + assert list(arr) == list(tdi) + + def test_astype_object(self): + tdi = pd.TimedeltaIndex(['1 Day', '3 Hours']) + arr = TimedeltaArrayMixin(tdi) + asobj = arr.astype('O') + assert isinstance(asobj, np.ndarray) + assert asobj.dtype == 'O' + assert list(asobj) == list(tdi) + + +class TestPeriodArray(object): + + def test_from_pi(self): + pi = pd.period_range('2016', freq='Q', periods=3) + arr = PeriodArrayMixin(pi) + assert list(arr) == list(pi) + + def test_astype_object(self): + pi = pd.period_range('2016', freq='Q', periods=3) + arr = PeriodArrayMixin(pi) + asobj = arr.astype('O') + assert isinstance(asobj, np.ndarray) + assert asobj.dtype == 'O' + assert list(asobj) == list(pi)