Skip to content

Commit 0324465

Browse files
jbrockmendeljreback
authored andcommitted
Mix EA into DTA/TDA; part of 24024 (pandas-dev#24502)
1 parent fce9ccf commit 0324465

File tree

4 files changed

+74
-27
lines changed

4 files changed

+74
-27
lines changed

pandas/core/arrays/datetimelike.py

+46-7
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,14 @@
2828
from pandas.core.dtypes.missing import isna
2929

3030
from pandas.core import nanops
31-
from pandas.core.algorithms import checked_add_with_arr, take, unique1d
31+
from pandas.core.algorithms import (
32+
checked_add_with_arr, take, unique1d, value_counts)
3233
import pandas.core.common as com
3334

3435
from pandas.tseries import frequencies
3536
from pandas.tseries.offsets import DateOffset, Tick
3637

37-
from .base import ExtensionOpsMixin
38+
from .base import ExtensionArray, ExtensionOpsMixin
3839

3940

4041
def _make_comparison_op(cls, op):
@@ -343,7 +344,9 @@ def ceil(self, freq, ambiguous='raise', nonexistent='raise'):
343344
return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)
344345

345346

346-
class DatetimeLikeArrayMixin(ExtensionOpsMixin, AttributesMixin):
347+
class DatetimeLikeArrayMixin(ExtensionOpsMixin,
348+
AttributesMixin,
349+
ExtensionArray):
347350
"""
348351
Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray
349352
@@ -701,6 +704,43 @@ def repeat(self, repeats, *args, **kwargs):
701704
values = self._data.repeat(repeats)
702705
return type(self)(values.view('i8'), dtype=self.dtype)
703706

707+
def value_counts(self, dropna=False):
708+
"""
709+
Return a Series containing counts of unique values.
710+
711+
Parameters
712+
----------
713+
dropna : boolean, default True
714+
Don't include counts of NaT values.
715+
716+
Returns
717+
-------
718+
Series
719+
"""
720+
from pandas import Series, Index
721+
722+
if dropna:
723+
values = self[~self.isna()]._data
724+
else:
725+
values = self._data
726+
727+
cls = type(self)
728+
729+
result = value_counts(values, sort=False, dropna=dropna)
730+
index = Index(cls(result.index.view('i8'), dtype=self.dtype),
731+
name=result.index.name)
732+
return Series(result.values, index=index, name=result.name)
733+
734+
def map(self, mapper):
735+
# TODO(GH-23179): Add ExtensionArray.map
736+
# Need to figure out if we want ExtensionArray.map first.
737+
# If so, then we can refactor IndexOpsMixin._map_values to
738+
# a standalone function and call from here..
739+
# Else, just rewrite _map_infer_values to do the right thing.
740+
from pandas import Index
741+
742+
return Index(self).map(mapper).array
743+
704744
# ------------------------------------------------------------------
705745
# Null Handling
706746

@@ -1357,10 +1397,9 @@ def _reduce(self, name, axis=0, skipna=True, **kwargs):
13571397
if op:
13581398
return op(axis=axis, skipna=skipna, **kwargs)
13591399
else:
1360-
raise TypeError("cannot perform {name} with type {dtype}"
1361-
.format(name=name, dtype=self.dtype))
1362-
# TODO: use super(DatetimeLikeArrayMixin, self)._reduce
1363-
# after we subclass ExtensionArray
1400+
return super(DatetimeLikeArrayMixin, self)._reduce(
1401+
name, skipna, **kwargs
1402+
)
13641403

13651404
def min(self, axis=None, skipna=True, *args, **kwargs):
13661405
"""

pandas/core/arrays/period.py

+2-19
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from pandas.core.dtypes.missing import isna, notna
2323

2424
import pandas.core.algorithms as algos
25-
from pandas.core.arrays import ExtensionArray, datetimelike as dtl
25+
from pandas.core.arrays import datetimelike as dtl
2626
import pandas.core.common as com
2727
from pandas.core.missing import backfill_1d, pad_1d
2828

@@ -92,9 +92,7 @@ def wrapper(self, other):
9292
return compat.set_function_name(wrapper, opname, cls)
9393

9494

95-
class PeriodArray(dtl.DatetimeLikeArrayMixin,
96-
dtl.DatelikeOps,
97-
ExtensionArray):
95+
class PeriodArray(dtl.DatetimeLikeArrayMixin, dtl.DatelikeOps):
9896
"""
9997
Pandas ExtensionArray for storing Period data.
10098
@@ -418,21 +416,6 @@ def fillna(self, value=None, method=None, limit=None):
418416
new_values = self.copy()
419417
return new_values
420418

421-
def value_counts(self, dropna=False):
422-
from pandas import Series, PeriodIndex
423-
424-
if dropna:
425-
values = self[~self.isna()]._data
426-
else:
427-
values = self._data
428-
429-
cls = type(self)
430-
431-
result = algos.value_counts(values, sort=False)
432-
index = PeriodIndex(cls(result.index, freq=self.freq),
433-
name=result.index.name)
434-
return Series(result.values, index=index, name=result.name)
435-
436419
# --------------------------------------------------------------------
437420

438421
def _time_shift(self, n, freq=None):

pandas/core/arrays/timedeltas.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from pandas.core.dtypes.common import (
1818
_NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype,
19-
is_integer_dtype, is_list_like, is_object_dtype, is_scalar,
19+
is_int64_dtype, is_integer_dtype, is_list_like, is_object_dtype, is_scalar,
2020
is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype,
2121
pandas_dtype)
2222
from pandas.core.dtypes.dtypes import DatetimeTZDtype
@@ -244,6 +244,16 @@ def _maybe_clear_freq(self):
244244
# ----------------------------------------------------------------
245245
# Array-Like / EA-Interface Methods
246246

247+
def __array__(self, dtype=None):
248+
# TODO(https://github.com/pandas-dev/pandas/pull/23593)
249+
# Maybe push to parent once datetimetz __array__ is figured out.
250+
if is_object_dtype(dtype):
251+
return np.array(list(self), dtype=object)
252+
elif is_int64_dtype(dtype):
253+
return self.asi8
254+
255+
return self._data
256+
247257
@Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__)
248258
def _validate_fill_value(self, fill_value):
249259
if isna(fill_value):

pandas/tests/arrays/test_datetimes.py

+15
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,21 @@ def test_repeat_preserves_tz(self):
123123
expected = DatetimeArray(arr.asi8, freq=None, tz=arr.tz)
124124
tm.assert_equal(repeated, expected)
125125

126+
def test_value_counts_preserves_tz(self):
127+
dti = pd.date_range('2000', periods=2, freq='D', tz='US/Central')
128+
arr = DatetimeArray(dti).repeat([4, 3])
129+
130+
result = arr.value_counts()
131+
132+
# Note: not tm.assert_index_equal, since `freq`s do not match
133+
assert result.index.equals(dti)
134+
135+
arr[-2] = pd.NaT
136+
result = arr.value_counts()
137+
expected = pd.Series([1, 4, 2],
138+
index=[pd.NaT, dti[0], dti[1]])
139+
tm.assert_series_equal(result, expected)
140+
126141

127142
class TestSequenceToDT64NS(object):
128143

0 commit comments

Comments
 (0)