Skip to content

Commit a047367

Browse files
jbrockmendeltm9k1
authored andcommitted
CLN: Move some PI/DTI methods to EA subclasses, implement tests (pandas-dev#22961)
1 parent f20d917 commit a047367

File tree

6 files changed

+266
-132
lines changed

6 files changed

+266
-132
lines changed

pandas/core/arrays/datetimes.py

+66-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
from pandas.core.algorithms import checked_add_with_arr
3232
from pandas.core import ops
3333

34-
from pandas.tseries.frequencies import to_offset
34+
from pandas.tseries.frequencies import to_offset, get_period_alias
3535
from pandas.tseries.offsets import Tick, generate_range
3636

3737
from pandas.core.arrays import datetimelike as dtl
@@ -200,6 +200,10 @@ def __new__(cls, values, freq=None, tz=None, dtype=None):
200200
# e.g. DatetimeIndex
201201
tz = values.tz
202202

203+
if freq is None and hasattr(values, "freq"):
204+
# i.e. DatetimeArray, DatetimeIndex
205+
freq = values.freq
206+
203207
freq, freq_infer = dtl.maybe_infer_freq(freq)
204208

205209
# if dtype has an embedded tz, capture it
@@ -764,6 +768,67 @@ def normalize(self):
764768
new_values = conversion.normalize_i8_timestamps(self.asi8, self.tz)
765769
return type(self)(new_values, freq='infer').tz_localize(self.tz)
766770

771+
def to_period(self, freq=None):
772+
"""
773+
Cast to PeriodArray/Index at a particular frequency.
774+
775+
Converts DatetimeArray/Index to PeriodArray/Index.
776+
777+
Parameters
778+
----------
779+
freq : string or Offset, optional
780+
One of pandas' :ref:`offset strings <timeseries.offset_aliases>`
781+
or an Offset object. Will be inferred by default.
782+
783+
Returns
784+
-------
785+
PeriodArray/Index
786+
787+
Raises
788+
------
789+
ValueError
790+
When converting a DatetimeArray/Index with non-regular values,
791+
so that a frequency cannot be inferred.
792+
793+
Examples
794+
--------
795+
>>> df = pd.DataFrame({"y": [1,2,3]},
796+
... index=pd.to_datetime(["2000-03-31 00:00:00",
797+
... "2000-05-31 00:00:00",
798+
... "2000-08-31 00:00:00"]))
799+
>>> df.index.to_period("M")
800+
PeriodIndex(['2000-03', '2000-05', '2000-08'],
801+
dtype='period[M]', freq='M')
802+
803+
Infer the daily frequency
804+
805+
>>> idx = pd.date_range("2017-01-01", periods=2)
806+
>>> idx.to_period()
807+
PeriodIndex(['2017-01-01', '2017-01-02'],
808+
dtype='period[D]', freq='D')
809+
810+
See also
811+
--------
812+
pandas.PeriodIndex: Immutable ndarray holding ordinal values
813+
pandas.DatetimeIndex.to_pydatetime: Return DatetimeIndex as object
814+
"""
815+
from pandas.core.arrays.period import PeriodArrayMixin
816+
817+
if self.tz is not None:
818+
warnings.warn("Converting to PeriodArray/Index representation "
819+
"will drop timezone information.", UserWarning)
820+
821+
if freq is None:
822+
freq = self.freqstr or self.inferred_freq
823+
824+
if freq is None:
825+
raise ValueError("You must pass a freq argument as "
826+
"current index has none.")
827+
828+
freq = get_period_alias(freq)
829+
830+
return PeriodArrayMixin(self.values, freq=freq)
831+
767832
# -----------------------------------------------------------------
768833
# Properties - Vectorized Timestamp Properties/Methods
769834

pandas/core/arrays/period.py

+66-2
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,15 @@
1010
Period, IncompatibleFrequency, DIFFERENT_FREQ_INDEX,
1111
get_period_field_arr, period_asfreq_arr)
1212
from pandas._libs.tslibs import period as libperiod
13-
from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds
13+
from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds, Timedelta
1414
from pandas._libs.tslibs.fields import isleapyear_arr
1515

1616
from pandas import compat
1717
from pandas.util._decorators import (cache_readonly, deprecate_kwarg)
1818

1919
from pandas.core.dtypes.common import (
20-
is_integer_dtype, is_float_dtype, is_period_dtype)
20+
is_integer_dtype, is_float_dtype, is_period_dtype,
21+
is_datetime64_dtype)
2122
from pandas.core.dtypes.dtypes import PeriodDtype
2223
from pandas.core.dtypes.generic import ABCSeries
2324

@@ -127,6 +128,10 @@ def __new__(cls, values, freq=None, **kwargs):
127128
freq = values.freq
128129
values = values.asi8
129130

131+
elif is_datetime64_dtype(values):
132+
# TODO: what if it has tz?
133+
values = dt64arr_to_periodarr(values, freq)
134+
130135
return cls._simple_new(values, freq, **kwargs)
131136

132137
@classmethod
@@ -207,6 +212,14 @@ def is_leap_year(self):
207212
""" Logical indicating if the date belongs to a leap year """
208213
return isleapyear_arr(np.asarray(self.year))
209214

215+
@property
216+
def start_time(self):
217+
return self.to_timestamp(how='start')
218+
219+
@property
220+
def end_time(self):
221+
return self.to_timestamp(how='end')
222+
210223
def asfreq(self, freq=None, how='E'):
211224
"""
212225
Convert the Period Array/Index to the specified frequency `freq`.
@@ -266,6 +279,48 @@ def asfreq(self, freq=None, how='E'):
266279

267280
return self._shallow_copy(new_data, freq=freq)
268281

282+
def to_timestamp(self, freq=None, how='start'):
283+
"""
284+
Cast to DatetimeArray/Index
285+
286+
Parameters
287+
----------
288+
freq : string or DateOffset, optional
289+
Target frequency. The default is 'D' for week or longer,
290+
'S' otherwise
291+
how : {'s', 'e', 'start', 'end'}
292+
293+
Returns
294+
-------
295+
DatetimeArray/Index
296+
"""
297+
from pandas.core.arrays.datetimes import DatetimeArrayMixin
298+
299+
how = libperiod._validate_end_alias(how)
300+
301+
end = how == 'E'
302+
if end:
303+
if freq == 'B':
304+
# roll forward to ensure we land on B date
305+
adjust = Timedelta(1, 'D') - Timedelta(1, 'ns')
306+
return self.to_timestamp(how='start') + adjust
307+
else:
308+
adjust = Timedelta(1, 'ns')
309+
return (self + 1).to_timestamp(how='start') - adjust
310+
311+
if freq is None:
312+
base, mult = frequencies.get_freq_code(self.freq)
313+
freq = frequencies.get_to_timestamp_base(base)
314+
else:
315+
freq = Period._maybe_convert_freq(freq)
316+
317+
base, mult = frequencies.get_freq_code(freq)
318+
new_data = self.asfreq(freq, how=how)
319+
320+
new_data = libperiod.periodarr_to_dt64arr(new_data._ndarray_values,
321+
base)
322+
return DatetimeArrayMixin(new_data, freq='infer')
323+
269324
# ------------------------------------------------------------------
270325
# Arithmetic Methods
271326

@@ -407,6 +462,15 @@ def _maybe_convert_timedelta(self, other):
407462
# -------------------------------------------------------------------
408463
# Constructor Helpers
409464

465+
def dt64arr_to_periodarr(data, freq, tz=None):
466+
if data.dtype != np.dtype('M8[ns]'):
467+
raise ValueError('Wrong dtype: %s' % data.dtype)
468+
469+
freq = Period._maybe_convert_freq(freq)
470+
base, mult = frequencies.get_freq_code(freq)
471+
return libperiod.dt64arr_to_periodarr(data.view('i8'), base, tz)
472+
473+
410474
def _get_ordinal_range(start, end, periods, freq, mult=1):
411475
if com.count_not_none(start, end, periods) != 2:
412476
raise ValueError('Of the three parameters: start, end, and periods, '

pandas/core/indexes/datetimes.py

+10-65
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
from pandas.core.indexes.base import Index, _index_shared_docs
3737
from pandas.core.indexes.numeric import Int64Index, Float64Index
3838
import pandas.compat as compat
39-
from pandas.tseries.frequencies import to_offset, get_period_alias, Resolution
39+
from pandas.tseries.frequencies import to_offset, Resolution
4040
from pandas.core.indexes.datetimelike import (
4141
DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin)
4242
from pandas.tseries.offsets import (
@@ -302,7 +302,8 @@ def __new__(cls, data=None,
302302
tz=tz, normalize=normalize,
303303
closed=closed, ambiguous=ambiguous)
304304

305-
if not isinstance(data, (np.ndarray, Index, ABCSeries)):
305+
if not isinstance(data, (np.ndarray, Index, ABCSeries,
306+
DatetimeArrayMixin)):
306307
if is_scalar(data):
307308
raise ValueError('DatetimeIndex() must be called with a '
308309
'collection of some kind, %s was passed'
@@ -673,67 +674,12 @@ def to_series(self, keep_tz=False, index=None, name=None):
673674

674675
return Series(values, index=index, name=name)
675676

677+
@Appender(DatetimeArrayMixin.to_period.__doc__)
676678
def to_period(self, freq=None):
677-
"""
678-
Cast to PeriodIndex at a particular frequency.
679-
680-
Converts DatetimeIndex to PeriodIndex.
681-
682-
Parameters
683-
----------
684-
freq : string or Offset, optional
685-
One of pandas' :ref:`offset strings <timeseries.offset_aliases>`
686-
or an Offset object. Will be inferred by default.
687-
688-
Returns
689-
-------
690-
PeriodIndex
691-
692-
Raises
693-
------
694-
ValueError
695-
When converting a DatetimeIndex with non-regular values, so that a
696-
frequency cannot be inferred.
697-
698-
Examples
699-
--------
700-
>>> df = pd.DataFrame({"y": [1,2,3]},
701-
... index=pd.to_datetime(["2000-03-31 00:00:00",
702-
... "2000-05-31 00:00:00",
703-
... "2000-08-31 00:00:00"]))
704-
>>> df.index.to_period("M")
705-
PeriodIndex(['2000-03', '2000-05', '2000-08'],
706-
dtype='period[M]', freq='M')
707-
708-
Infer the daily frequency
709-
710-
>>> idx = pd.date_range("2017-01-01", periods=2)
711-
>>> idx.to_period()
712-
PeriodIndex(['2017-01-01', '2017-01-02'],
713-
dtype='period[D]', freq='D')
714-
715-
See also
716-
--------
717-
pandas.PeriodIndex: Immutable ndarray holding ordinal values
718-
pandas.DatetimeIndex.to_pydatetime: Return DatetimeIndex as object
719-
"""
720679
from pandas.core.indexes.period import PeriodIndex
721680

722-
if self.tz is not None:
723-
warnings.warn("Converting to PeriodIndex representation will "
724-
"drop timezone information.", UserWarning)
725-
726-
if freq is None:
727-
freq = self.freqstr or self.inferred_freq
728-
729-
if freq is None:
730-
msg = ("You must pass a freq argument as "
731-
"current index has none.")
732-
raise ValueError(msg)
733-
734-
freq = get_period_alias(freq)
735-
736-
return PeriodIndex(self.values, name=self.name, freq=freq)
681+
result = DatetimeArrayMixin.to_period(self, freq=freq)
682+
return PeriodIndex(result, name=self.name)
737683

738684
def snap(self, freq='S'):
739685
"""
@@ -758,6 +704,7 @@ def snap(self, freq='S'):
758704

759705
# we know it conforms; skip check
760706
return DatetimeIndex(snapped, freq=freq, verify_integrity=False)
707+
# TODO: what about self.name? if so, use shallow_copy?
761708

762709
def unique(self, level=None):
763710
# Override here since IndexOpsMixin.unique uses self._values.unique
@@ -769,8 +716,7 @@ def unique(self, level=None):
769716
else:
770717
naive = self
771718
result = super(DatetimeIndex, naive).unique(level=level)
772-
return self._simple_new(result.values, name=self.name, tz=self.tz,
773-
freq=self.freq)
719+
return self._shallow_copy(result.values)
774720

775721
def union(self, other):
776722
"""
@@ -1421,8 +1367,7 @@ def insert(self, loc, item):
14211367
try:
14221368
new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)],
14231369
self[loc:].asi8))
1424-
return DatetimeIndex(new_dates, name=self.name, freq=freq,
1425-
tz=self.tz)
1370+
return self._shallow_copy(new_dates, freq=freq)
14261371
except (AttributeError, TypeError):
14271372

14281373
# fall back to object index
@@ -1458,7 +1403,7 @@ def delete(self, loc):
14581403
if (loc.start in (0, None) or loc.stop in (len(self), None)):
14591404
freq = self.freq
14601405

1461-
return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz)
1406+
return self._shallow_copy(new_dates, freq=freq)
14621407

14631408
def indexer_at_time(self, time, asof=False):
14641409
"""

0 commit comments

Comments
 (0)