diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index a0a9b57712249..7daaa8de1734f 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -31,7 +31,7 @@ from pandas.core.algorithms import checked_add_with_arr from pandas.core import ops -from pandas.tseries.frequencies import to_offset +from pandas.tseries.frequencies import to_offset, get_period_alias from pandas.tseries.offsets import Tick, generate_range from pandas.core.arrays import datetimelike as dtl @@ -200,6 +200,10 @@ def __new__(cls, values, freq=None, tz=None, dtype=None): # e.g. DatetimeIndex tz = values.tz + if freq is None and hasattr(values, "freq"): + # i.e. DatetimeArray, DatetimeIndex + freq = values.freq + freq, freq_infer = dtl.maybe_infer_freq(freq) # if dtype has an embedded tz, capture it @@ -764,6 +768,67 @@ def normalize(self): new_values = conversion.normalize_i8_timestamps(self.asi8, self.tz) return type(self)(new_values, freq='infer').tz_localize(self.tz) + def to_period(self, freq=None): + """ + Cast to PeriodArray/Index at a particular frequency. + + Converts DatetimeArray/Index to PeriodArray/Index. + + Parameters + ---------- + freq : string or Offset, optional + One of pandas' :ref:`offset strings ` + or an Offset object. Will be inferred by default. + + Returns + ------- + PeriodArray/Index + + Raises + ------ + ValueError + When converting a DatetimeArray/Index with non-regular values, + so that a frequency cannot be inferred. + + Examples + -------- + >>> df = pd.DataFrame({"y": [1,2,3]}, + ... index=pd.to_datetime(["2000-03-31 00:00:00", + ... "2000-05-31 00:00:00", + ... "2000-08-31 00:00:00"])) + >>> df.index.to_period("M") + PeriodIndex(['2000-03', '2000-05', '2000-08'], + dtype='period[M]', freq='M') + + Infer the daily frequency + + >>> idx = pd.date_range("2017-01-01", periods=2) + >>> idx.to_period() + PeriodIndex(['2017-01-01', '2017-01-02'], + dtype='period[D]', freq='D') + + See also + -------- + pandas.PeriodIndex: Immutable ndarray holding ordinal values + pandas.DatetimeIndex.to_pydatetime: Return DatetimeIndex as object + """ + from pandas.core.arrays.period import PeriodArrayMixin + + if self.tz is not None: + warnings.warn("Converting to PeriodArray/Index representation " + "will drop timezone information.", UserWarning) + + if freq is None: + freq = self.freqstr or self.inferred_freq + + if freq is None: + raise ValueError("You must pass a freq argument as " + "current index has none.") + + freq = get_period_alias(freq) + + return PeriodArrayMixin(self.values, freq=freq) + # ----------------------------------------------------------------- # Properties - Vectorized Timestamp Properties/Methods diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 41b4c5c669efc..9e877de1a3c0a 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -10,14 +10,15 @@ Period, IncompatibleFrequency, DIFFERENT_FREQ_INDEX, get_period_field_arr, period_asfreq_arr) from pandas._libs.tslibs import period as libperiod -from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds +from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds, Timedelta from pandas._libs.tslibs.fields import isleapyear_arr from pandas import compat from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( - is_integer_dtype, is_float_dtype, is_period_dtype) + is_integer_dtype, is_float_dtype, is_period_dtype, + is_datetime64_dtype) from pandas.core.dtypes.dtypes import PeriodDtype from pandas.core.dtypes.generic import ABCSeries @@ -127,6 +128,10 @@ def __new__(cls, values, freq=None, **kwargs): freq = values.freq values = values.asi8 + elif is_datetime64_dtype(values): + # TODO: what if it has tz? + values = dt64arr_to_periodarr(values, freq) + return cls._simple_new(values, freq, **kwargs) @classmethod @@ -207,6 +212,14 @@ def is_leap_year(self): """ Logical indicating if the date belongs to a leap year """ return isleapyear_arr(np.asarray(self.year)) + @property + def start_time(self): + return self.to_timestamp(how='start') + + @property + def end_time(self): + return self.to_timestamp(how='end') + def asfreq(self, freq=None, how='E'): """ Convert the Period Array/Index to the specified frequency `freq`. @@ -266,6 +279,48 @@ def asfreq(self, freq=None, how='E'): return self._shallow_copy(new_data, freq=freq) + def to_timestamp(self, freq=None, how='start'): + """ + Cast to DatetimeArray/Index + + Parameters + ---------- + freq : string or DateOffset, optional + Target frequency. The default is 'D' for week or longer, + 'S' otherwise + how : {'s', 'e', 'start', 'end'} + + Returns + ------- + DatetimeArray/Index + """ + from pandas.core.arrays.datetimes import DatetimeArrayMixin + + how = libperiod._validate_end_alias(how) + + end = how == 'E' + if end: + if freq == 'B': + # roll forward to ensure we land on B date + adjust = Timedelta(1, 'D') - Timedelta(1, 'ns') + return self.to_timestamp(how='start') + adjust + else: + adjust = Timedelta(1, 'ns') + return (self + 1).to_timestamp(how='start') - adjust + + if freq is None: + base, mult = frequencies.get_freq_code(self.freq) + freq = frequencies.get_to_timestamp_base(base) + else: + freq = Period._maybe_convert_freq(freq) + + base, mult = frequencies.get_freq_code(freq) + new_data = self.asfreq(freq, how=how) + + new_data = libperiod.periodarr_to_dt64arr(new_data._ndarray_values, + base) + return DatetimeArrayMixin(new_data, freq='infer') + # ------------------------------------------------------------------ # Arithmetic Methods @@ -392,6 +447,15 @@ def _maybe_convert_timedelta(self, other): # ------------------------------------------------------------------- # Constructor Helpers +def dt64arr_to_periodarr(data, freq, tz=None): + if data.dtype != np.dtype('M8[ns]'): + raise ValueError('Wrong dtype: %s' % data.dtype) + + freq = Period._maybe_convert_freq(freq) + base, mult = frequencies.get_freq_code(freq) + return libperiod.dt64arr_to_periodarr(data.view('i8'), base, tz) + + def _get_ordinal_range(start, end, periods, freq, mult=1): if com.count_not_none(start, end, periods) != 2: raise ValueError('Of the three parameters: start, end, and periods, ' diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index a6cdaa0c2163a..e40ceadc1a083 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -36,7 +36,7 @@ from pandas.core.indexes.base import Index, _index_shared_docs from pandas.core.indexes.numeric import Int64Index, Float64Index import pandas.compat as compat -from pandas.tseries.frequencies import to_offset, get_period_alias, Resolution +from pandas.tseries.frequencies import to_offset, Resolution from pandas.core.indexes.datetimelike import ( DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin) from pandas.tseries.offsets import ( @@ -302,7 +302,8 @@ def __new__(cls, data=None, tz=tz, normalize=normalize, closed=closed, ambiguous=ambiguous) - if not isinstance(data, (np.ndarray, Index, ABCSeries)): + if not isinstance(data, (np.ndarray, Index, ABCSeries, + DatetimeArrayMixin)): if is_scalar(data): raise ValueError('DatetimeIndex() must be called with a ' 'collection of some kind, %s was passed' @@ -673,67 +674,12 @@ def to_series(self, keep_tz=False, index=None, name=None): return Series(values, index=index, name=name) + @Appender(DatetimeArrayMixin.to_period.__doc__) def to_period(self, freq=None): - """ - Cast to PeriodIndex at a particular frequency. - - Converts DatetimeIndex to PeriodIndex. - - Parameters - ---------- - freq : string or Offset, optional - One of pandas' :ref:`offset strings ` - or an Offset object. Will be inferred by default. - - Returns - ------- - PeriodIndex - - Raises - ------ - ValueError - When converting a DatetimeIndex with non-regular values, so that a - frequency cannot be inferred. - - Examples - -------- - >>> df = pd.DataFrame({"y": [1,2,3]}, - ... index=pd.to_datetime(["2000-03-31 00:00:00", - ... "2000-05-31 00:00:00", - ... "2000-08-31 00:00:00"])) - >>> df.index.to_period("M") - PeriodIndex(['2000-03', '2000-05', '2000-08'], - dtype='period[M]', freq='M') - - Infer the daily frequency - - >>> idx = pd.date_range("2017-01-01", periods=2) - >>> idx.to_period() - PeriodIndex(['2017-01-01', '2017-01-02'], - dtype='period[D]', freq='D') - - See also - -------- - pandas.PeriodIndex: Immutable ndarray holding ordinal values - pandas.DatetimeIndex.to_pydatetime: Return DatetimeIndex as object - """ from pandas.core.indexes.period import PeriodIndex - if self.tz is not None: - warnings.warn("Converting to PeriodIndex representation will " - "drop timezone information.", UserWarning) - - if freq is None: - freq = self.freqstr or self.inferred_freq - - if freq is None: - msg = ("You must pass a freq argument as " - "current index has none.") - raise ValueError(msg) - - freq = get_period_alias(freq) - - return PeriodIndex(self.values, name=self.name, freq=freq) + result = DatetimeArrayMixin.to_period(self, freq=freq) + return PeriodIndex(result, name=self.name) def snap(self, freq='S'): """ @@ -758,6 +704,7 @@ def snap(self, freq='S'): # we know it conforms; skip check return DatetimeIndex(snapped, freq=freq, verify_integrity=False) + # TODO: what about self.name? if so, use shallow_copy? def unique(self, level=None): # Override here since IndexOpsMixin.unique uses self._values.unique @@ -769,8 +716,7 @@ def unique(self, level=None): else: naive = self result = super(DatetimeIndex, naive).unique(level=level) - return self._simple_new(result.values, name=self.name, tz=self.tz, - freq=self.freq) + return self._shallow_copy(result.values) def union(self, other): """ @@ -1421,8 +1367,7 @@ def insert(self, loc, item): try: new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)], self[loc:].asi8)) - return DatetimeIndex(new_dates, name=self.name, freq=freq, - tz=self.tz) + return self._shallow_copy(new_dates, freq=freq) except (AttributeError, TypeError): # fall back to object index @@ -1458,7 +1403,7 @@ def delete(self, loc): if (loc.start in (0, None) or loc.stop in (len(self), None)): freq = self.freq - return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz) + return self._shallow_copy(new_dates, freq=freq) def indexer_at_time(self, time, asof=False): """ diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index cc008694a8b84..7833dd851db34 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -17,7 +17,6 @@ pandas_dtype, ensure_object) -import pandas.tseries.frequencies as frequencies from pandas.tseries.frequencies import get_freq_code as _gfc from pandas.core.indexes.datetimes import DatetimeIndex, Int64Index, Index @@ -25,13 +24,13 @@ from pandas.core.tools.datetimes import parse_time_string from pandas._libs.lib import infer_dtype -from pandas._libs import tslib, index as libindex, Timedelta +from pandas._libs import tslib, index as libindex from pandas._libs.tslibs.period import (Period, IncompatibleFrequency, - DIFFERENT_FREQ_INDEX, - _validate_end_alias) + DIFFERENT_FREQ_INDEX) from pandas._libs.tslibs import resolution, period -from pandas.core.arrays.period import PeriodArrayMixin +from pandas.core.arrays import datetimelike as dtl +from pandas.core.arrays.period import PeriodArrayMixin, dt64arr_to_periodarr from pandas.core.base import _shared_docs from pandas.core.indexes.base import _index_shared_docs, ensure_index @@ -56,14 +55,6 @@ def f(self): return property(f) -def dt64arr_to_periodarr(data, freq, tz): - if data.dtype != np.dtype('M8[ns]'): - raise ValueError('Wrong dtype: %s' % data.dtype) - - freq = Period._maybe_convert_freq(freq) - base, mult = _gfc(freq) - return period.dt64arr_to_periodarr(data.view('i8'), base, tz) - # --- Period index sketch @@ -185,12 +176,7 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, raise TypeError('__new__() got an unexpected keyword argument {}'. format(list(set(fields) - valid_field_set)[0])) - if periods is not None: - if is_float(periods): - periods = int(periods) - elif not is_integer(periods): - msg = 'periods must be a number, got {periods}' - raise TypeError(msg.format(periods=periods)) + periods = dtl.validate_periods(periods) if name is None and hasattr(data, 'name'): name = data.name @@ -461,55 +447,23 @@ def is_full(self): daysinmonth = days_in_month @property + @Appender(PeriodArrayMixin.start_time.__doc__) def start_time(self): - return self.to_timestamp(how='start') + return PeriodArrayMixin.start_time.fget(self) @property + @Appender(PeriodArrayMixin.end_time.__doc__) def end_time(self): - return self.to_timestamp(how='end') + return PeriodArrayMixin.end_time.fget(self) def _mpl_repr(self): # how to represent ourselves to matplotlib return self.astype(object).values + @Appender(PeriodArrayMixin.to_timestamp.__doc__) def to_timestamp(self, freq=None, how='start'): - """ - Cast to DatetimeIndex - - Parameters - ---------- - freq : string or DateOffset, optional - Target frequency. The default is 'D' for week or longer, - 'S' otherwise - how : {'s', 'e', 'start', 'end'} - - Returns - ------- - DatetimeIndex - """ - how = _validate_end_alias(how) - - end = how == 'E' - if end: - if freq == 'B': - # roll forward to ensure we land on B date - adjust = Timedelta(1, 'D') - Timedelta(1, 'ns') - return self.to_timestamp(how='start') + adjust - else: - adjust = Timedelta(1, 'ns') - return (self + 1).to_timestamp(how='start') - adjust - - if freq is None: - base, mult = _gfc(self.freq) - freq = frequencies.get_to_timestamp_base(base) - else: - freq = Period._maybe_convert_freq(freq) - - base, mult = _gfc(freq) - new_data = self.asfreq(freq, how) - - new_data = period.periodarr_to_dt64arr(new_data._ndarray_values, base) - return DatetimeIndex(new_data, freq='infer', name=self.name) + result = PeriodArrayMixin.to_timestamp(self, freq=freq, how=how) + return DatetimeIndex(result, name=self.name) @property def inferred_type(self): diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 933bc6233dca9..ee604f44b98e0 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -668,7 +668,7 @@ def insert(self, loc, item): try: new_tds = np.concatenate((self[:loc].asi8, [item.view(np.int64)], self[loc:].asi8)) - return TimedeltaIndex(new_tds, name=self.name, freq=freq) + return self._shallow_copy(new_tds, freq=freq) except (AttributeError, TypeError): diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 24f34884dc077..6bb4241451b3f 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -1,13 +1,50 @@ # -*- coding: utf-8 -*- import numpy as np +import pytest import pandas as pd +import pandas.util.testing as tm from pandas.core.arrays.datetimes import DatetimeArrayMixin from pandas.core.arrays.timedeltas import TimedeltaArrayMixin from pandas.core.arrays.period import PeriodArrayMixin +# TODO: more freq variants +@pytest.fixture(params=['D', 'B', 'W', 'M', 'Q', 'Y']) +def period_index(request): + """ + A fixture to provide PeriodIndex objects with different frequencies. + + Most PeriodArray behavior is already tested in PeriodIndex tests, + so here we just test that the PeriodArray behavior matches + the PeriodIndex behavior. + """ + freqstr = request.param + # TODO: non-monotone indexes; NaTs, different start dates + pi = pd.period_range(start=pd.Timestamp('2000-01-01'), + periods=100, + freq=freqstr) + return pi + + +@pytest.fixture(params=['D', 'B', 'W', 'M', 'Q', 'Y']) +def datetime_index(request): + """ + A fixture to provide DatetimeIndex objects with different frequencies. + + Most DatetimeArray behavior is already tested in DatetimeIndex tests, + so here we just test that the DatetimeIndex behavior matches + the DatetimeIndex behavior. + """ + freqstr = request.param + # TODO: non-monotone indexes; NaTs, different start dates, timezones + pi = pd.date_range(start=pd.Timestamp('2000-01-01'), + periods=100, + freq=freqstr) + return pi + + class TestDatetimeArray(object): def test_from_dti(self, tz_naive_fixture): @@ -30,6 +67,41 @@ def test_astype_object(self, tz_naive_fixture): assert asobj.dtype == 'O' assert list(asobj) == list(dti) + @pytest.mark.parametrize('freqstr', ['D', 'B', 'W', 'M', 'Q', 'Y']) + def test_to_period(self, datetime_index, freqstr): + dti = datetime_index + arr = DatetimeArrayMixin(dti) + + expected = dti.to_period(freq=freqstr) + result = arr.to_period(freq=freqstr) + assert isinstance(result, PeriodArrayMixin) + + # placeholder until these become actual EA subclasses and we can use + # an EA-specific tm.assert_ function + tm.assert_index_equal(pd.Index(result), pd.Index(expected)) + + @pytest.mark.parametrize('propname', pd.DatetimeIndex._bool_ops) + def test_bool_properties(self, datetime_index, propname): + # in this case _bool_ops is just `is_leap_year` + dti = datetime_index + arr = DatetimeArrayMixin(dti) + assert dti.freq == arr.freq + + result = getattr(arr, propname) + expected = np.array(getattr(dti, propname), dtype=result.dtype) + + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize('propname', pd.DatetimeIndex._field_ops) + def test_int_properties(self, datetime_index, propname): + dti = datetime_index + arr = DatetimeArrayMixin(dti) + + result = getattr(arr, propname) + expected = np.array(getattr(dti, propname), dtype=result.dtype) + + tm.assert_numpy_array_equal(result, expected) + class TestTimedeltaArray(object): def test_from_tdi(self): @@ -53,20 +125,54 @@ def test_astype_object(self): class TestPeriodArray(object): - def test_from_pi(self): - pi = pd.period_range('2016', freq='Q', periods=3) + def test_from_pi(self, period_index): + pi = period_index arr = PeriodArrayMixin(pi) assert list(arr) == list(pi) - # Check that Index.__new__ knows what to do with TimedeltaArray + # Check that Index.__new__ knows what to do with PeriodArray pi2 = pd.Index(arr) assert isinstance(pi2, pd.PeriodIndex) assert list(pi2) == list(arr) - def test_astype_object(self): - pi = pd.period_range('2016', freq='Q', periods=3) + def test_astype_object(self, period_index): + pi = period_index arr = PeriodArrayMixin(pi) asobj = arr.astype('O') assert isinstance(asobj, np.ndarray) assert asobj.dtype == 'O' assert list(asobj) == list(pi) + + @pytest.mark.parametrize('how', ['S', 'E']) + def test_to_timestamp(self, how, period_index): + pi = period_index + arr = PeriodArrayMixin(pi) + + expected = DatetimeArrayMixin(pi.to_timestamp(how=how)) + result = arr.to_timestamp(how=how) + assert isinstance(result, DatetimeArrayMixin) + + # placeholder until these become actual EA subclasses and we can use + # an EA-specific tm.assert_ function + tm.assert_index_equal(pd.Index(result), pd.Index(expected)) + + @pytest.mark.parametrize('propname', pd.PeriodIndex._bool_ops) + def test_bool_properties(self, period_index, propname): + # in this case _bool_ops is just `is_leap_year` + pi = period_index + arr = PeriodArrayMixin(pi) + + result = getattr(arr, propname) + expected = np.array(getattr(pi, propname)) + + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize('propname', pd.PeriodIndex._field_ops) + def test_int_properties(self, period_index, propname): + pi = period_index + arr = PeriodArrayMixin(pi) + + result = getattr(arr, propname) + expected = np.array(getattr(pi, propname)) + + tm.assert_numpy_array_equal(result, expected)