Skip to content

Commit a7c65ed

Browse files
committed
Make .dt available for Series of type category with datetimes
If a series is a type category and the underlying Categorical has categories of type datetime, then make it possible to use the .dt assessor on such a series. The string methods work on the categories (and therefore fast if we have only a few categories), but return a Series with a dtype other than category (integer,...), so that it is no different if we use .dt on a series of type datetime or of type category.
1 parent 149feef commit a7c65ed

File tree

3 files changed

+107
-10
lines changed

3 files changed

+107
-10
lines changed

pandas/tests/test_categorical.py

+75
Original file line numberDiff line numberDiff line change
@@ -3712,6 +3712,81 @@ def test_str_accessor_api_for_categorical(self):
37123712
invalid.str
37133713
self.assertFalse(hasattr(invalid, 'str'))
37143714

3715+
def test_dt_accessor_api_for_categorical(self):
3716+
# https://github.com/pydata/pandas/issues/10661
3717+
from pandas.tseries.common import Properties
3718+
from pandas.tseries.index import date_range, DatetimeIndex
3719+
from pandas.tseries.period import period_range, PeriodIndex
3720+
from pandas.tseries.tdi import timedelta_range, TimedeltaIndex
3721+
3722+
s_dr = Series(date_range('1/1/2015', periods=5, tz="MET"))
3723+
c_dr = s_dr.astype("category")
3724+
3725+
s_pr = Series(period_range('1/1/2015', freq='D', periods=5))
3726+
c_pr = s_pr.astype("category")
3727+
3728+
s_tdr = Series(timedelta_range('1 days','10 days'))
3729+
c_tdr = s_tdr.astype("category")
3730+
3731+
test_data = [
3732+
("Datetime", DatetimeIndex._datetimelike_ops, s_dr, c_dr),
3733+
("Period", PeriodIndex._datetimelike_ops, s_pr, c_pr),
3734+
("Timedelta", TimedeltaIndex._datetimelike_ops, s_tdr, c_tdr)]
3735+
3736+
self.assertIsInstance(c_dr.dt, Properties)
3737+
3738+
special_func_defs = [
3739+
('strftime', ("%Y-%m-%d",), {}),
3740+
('tz_convert', ("EST",), {}),
3741+
#('tz_localize', ("UTC",), {}),
3742+
]
3743+
_special_func_names = [f[0] for f in special_func_defs]
3744+
3745+
# the series is already localized
3746+
_ignore_names = ['tz_localize']
3747+
3748+
for name, attr_names, s, c in test_data:
3749+
func_names = [f for f in dir(s.dt) if not (f.startswith("_") or
3750+
f in attr_names or
3751+
f in _special_func_names or
3752+
f in _ignore_names)]
3753+
3754+
func_defs = [(f, (), {}) for f in func_names]
3755+
for f_def in special_func_defs:
3756+
if f_def[0] in dir(s.dt):
3757+
func_defs.append(f_def)
3758+
3759+
for func, args, kwargs in func_defs:
3760+
res = getattr(c.dt, func)(*args, **kwargs)
3761+
exp = getattr(s.dt, func)(*args, **kwargs)
3762+
3763+
if isinstance(res, pd.DataFrame):
3764+
tm.assert_frame_equal(res, exp)
3765+
elif isinstance(res, pd.Series):
3766+
tm.assert_series_equal(res, exp)
3767+
else:
3768+
tm.assert_numpy_array_equal(res, exp)
3769+
3770+
for attr in attr_names:
3771+
try:
3772+
res = getattr(c.dt, attr)
3773+
exp = getattr(s.dt, attr)
3774+
except Exception as e:
3775+
print(name, attr)
3776+
raise e
3777+
3778+
if isinstance(res, pd.DataFrame):
3779+
tm.assert_frame_equal(res, exp)
3780+
elif isinstance(res, pd.Series):
3781+
tm.assert_series_equal(res, exp)
3782+
else:
3783+
tm.assert_numpy_array_equal(res, exp)
3784+
3785+
invalid = Series([1,2,3]).astype('category')
3786+
with tm.assertRaisesRegexp(AttributeError, "Can only use .dt accessor with datetimelike"):
3787+
invalid.dt
3788+
self.assertFalse(hasattr(invalid, 'str'))
3789+
37153790
def test_pickle_v0_14_1(self):
37163791

37173792
# we have the name warning

pandas/tests/test_series.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -362,12 +362,20 @@ def test_tab_completion(self):
362362
self.assertTrue('str' not in dir(s))
363363
self.assertTrue('cat' not in dir(s))
364364

365-
# similiarly for .cat
365+
# similiarly for .cat, but with the twist that str and dt should be there
366+
# if the categories are of that type
367+
# first cat and str
366368
s = Series(list('abbcd'), dtype="category")
367369
self.assertTrue('cat' in dir(s))
368-
self.assertTrue('str' not in dir(s))
370+
self.assertTrue('str' in dir(s)) # as it is a string categorical
369371
self.assertTrue('dt' not in dir(s))
370372

373+
# similar to cat and str
374+
s = Series(date_range('1/1/2015', periods=5)).astype("category")
375+
self.assertTrue('cat' in dir(s))
376+
self.assertTrue('str' not in dir(s))
377+
self.assertTrue('dt' in dir(s)) # as it is a datetime categorical
378+
371379
def test_binop_maybe_preserve_name(self):
372380
# names match, preserve
373381
result = self.ts * self.ts

pandas/tseries/common.py

+22-8
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
from pandas.core.common import (_NS_DTYPE, _TD_DTYPE, is_period_arraylike,
1111
is_datetime_arraylike, is_integer_dtype, is_list_like,
1212
is_datetime64_dtype, is_datetime64tz_dtype,
13-
is_timedelta64_dtype,
14-
get_dtype_kinds)
13+
is_timedelta64_dtype, is_categorical_dtype,
14+
get_dtype_kinds, take_1d)
1515

1616
def is_datetimelike(data):
1717
""" return a boolean if we can be successfully converted to a datetimelike """
@@ -45,26 +45,36 @@ def maybe_to_datetimelike(data, copy=False):
4545
raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
4646

4747
index = data.index
48+
name = data.name
49+
orig = data if is_categorical_dtype(data) else None
50+
if orig is not None:
51+
data = orig.values.categories
52+
4853
if is_datetime64_dtype(data.dtype):
49-
return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name)
54+
return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=name,
55+
orig=orig)
5056
elif is_datetime64tz_dtype(data.dtype):
51-
return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer', ambiguous='infer'), index, name=data.name)
57+
return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer', ambiguous='infer'),
58+
index, data.name, orig=orig)
5259
elif is_timedelta64_dtype(data.dtype):
53-
return TimedeltaProperties(TimedeltaIndex(data, copy=copy, freq='infer'), index, name=data.name)
60+
return TimedeltaProperties(TimedeltaIndex(data, copy=copy, freq='infer'), index,
61+
name=name, orig=orig)
5462
else:
5563
if is_period_arraylike(data):
56-
return PeriodProperties(PeriodIndex(data, copy=copy), index, name=data.name)
64+
return PeriodProperties(PeriodIndex(data, copy=copy), index, name=name, orig=orig)
5765
if is_datetime_arraylike(data):
58-
return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name)
66+
return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index,
67+
name=name, orig=orig)
5968

6069
raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
6170

6271
class Properties(PandasDelegate, NoNewAttributesMixin):
6372

64-
def __init__(self, values, index, name):
73+
def __init__(self, values, index, name, orig=None):
6574
self.values = values
6675
self.index = index
6776
self.name = name
77+
self.orig = orig
6878
self._freeze()
6979

7080
def _delegate_property_get(self, name):
@@ -79,6 +89,10 @@ def _delegate_property_get(self, name):
7989
elif not is_list_like(result):
8090
return result
8191

92+
# blow up if we operate on categories
93+
if self.orig is not None:
94+
result = take_1d(result, self.orig.cat.codes)
95+
8296
# return the result as a Series, which is by definition a copy
8397
result = Series(result, index=self.index, name=self.name)
8498

0 commit comments

Comments
 (0)