Skip to content

Commit c2939ea

Browse files
committed
ENH/DOC: reimplement Series delegates/accessors using descriptors
This PR implements `Series.str`, `Series.dt` and `Series.cat` as descriptors instead of properties. This means that the API docs can refer to methods like `Series.str.lower` instead of `StringMethods.lower` and tab-completion like `Series.str.<tab>` also works, even on the base class. CC jorisvandenbossche jreback
1 parent 5fd1fbd commit c2939ea

File tree

9 files changed

+96
-21
lines changed

9 files changed

+96
-21
lines changed

doc/source/whatsnew/v0.16.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ Enhancements
107107

108108
- ``Timedelta`` will now accept nanoseconds keyword in constructor (:issue:`9273`)
109109

110+
- Added auto-complete for ``Series.str.<tab>``, ``Series.dt.<tab>`` and ``Series.cat.<tab>`` (:issue:`9322`)
111+
110112
Performance
111113
~~~~~~~~~~~
112114

pandas/core/base.py

+22
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,28 @@ def f(self, *args, **kwargs):
166166
if not hasattr(cls, name):
167167
setattr(cls,name,f)
168168

169+
170+
class AccessorProperty(object):
171+
"""Descriptor for implementing accessor properties like Series.str
172+
"""
173+
def __init__(self, accessor_cls, construct_accessor):
174+
self.accessor_cls = accessor_cls
175+
self.construct_accessor = construct_accessor
176+
self.__doc__ = accessor_cls.__doc__
177+
178+
def __get__(self, instance, owner=None):
179+
if instance is None:
180+
# this ensures that Series.str.<method> is well defined
181+
return self.accessor_cls
182+
return self.construct_accessor(instance)
183+
184+
def __set__(self, instance, value):
185+
raise AttributeError("can't set attribute")
186+
187+
def __delete__(self, instance):
188+
raise AttributeError("can't delete attribute")
189+
190+
169191
class FrozenList(PandasObject, list):
170192

171193
"""

pandas/core/categorical.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -829,7 +829,7 @@ def searchsorted(self, v, side='left', sorter=None):
829829
array([3, 4]) # eggs before milk
830830
>>> x = pd.Categorical(['apple', 'bread', 'bread', 'cheese', 'milk', 'donuts' ])
831831
>>> x.searchsorted(['bread', 'eggs'], side='right', sorter=[0, 1, 2, 3, 5, 4])
832-
array([3, 5]) # eggs after donuts, after switching milk and donuts
832+
array([3, 5]) # eggs after donuts, after switching milk and donuts
833833
"""
834834
if not self.ordered:
835835
raise ValueError("searchsorted requires an ordered Categorical.")

pandas/core/series.py

+18-12
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,10 @@
2727
from pandas.core.indexing import _check_bool_indexer, _maybe_convert_indices
2828
from pandas.core import generic, base
2929
from pandas.core.internals import SingleBlockManager
30-
from pandas.core.categorical import Categorical
30+
from pandas.core.categorical import Categorical, CategoricalAccessor
31+
from pandas.core.strings import StringMethods
32+
from pandas.tseries.common import (maybe_to_datetimelike,
33+
CombinedDatetimelikeProperties)
3134
from pandas.tseries.index import DatetimeIndex
3235
from pandas.tseries.tdi import TimedeltaIndex
3336
from pandas.tseries.period import PeriodIndex, Period
@@ -2452,11 +2455,6 @@ def asof(self, where):
24522455
new_values = com.take_1d(values, locs)
24532456
return self._constructor(new_values, index=where).__finalize__(self)
24542457

2455-
@cache_readonly
2456-
def str(self):
2457-
from pandas.core.strings import StringMethods
2458-
return StringMethods(self)
2459-
24602458
def to_timestamp(self, freq=None, how='start', copy=True):
24612459
"""
24622460
Cast to datetimeindex of timestamps, at *beginning* of period
@@ -2502,27 +2500,35 @@ def to_period(self, freq=None, copy=True):
25022500
return self._constructor(new_values,
25032501
index=new_index).__finalize__(self)
25042502

2503+
#------------------------------------------------------------------------------
2504+
# string methods
2505+
2506+
def _make_str_accessor(self):
2507+
return StringMethods(self)
2508+
2509+
str = base.AccessorProperty(StringMethods, _make_str_accessor)
2510+
25052511
#------------------------------------------------------------------------------
25062512
# Datetimelike delegation methods
25072513

2508-
@cache_readonly
2509-
def dt(self):
2510-
from pandas.tseries.common import maybe_to_datetimelike
2514+
def _make_dt_accessor(self):
25112515
try:
25122516
return maybe_to_datetimelike(self)
25132517
except (Exception):
25142518
raise TypeError("Can only use .dt accessor with datetimelike values")
25152519

2520+
dt = base.AccessorProperty(CombinedDatetimelikeProperties, _make_dt_accessor)
2521+
25162522
#------------------------------------------------------------------------------
25172523
# Categorical methods
25182524

2519-
@cache_readonly
2520-
def cat(self):
2521-
from pandas.core.categorical import CategoricalAccessor
2525+
def _make_cat_accessor(self):
25222526
if not com.is_categorical_dtype(self.dtype):
25232527
raise TypeError("Can only use .cat accessor with a 'category' dtype")
25242528
return CategoricalAccessor(self.values, self.index)
25252529

2530+
cat = base.AccessorProperty(CategoricalAccessor, _make_cat_accessor)
2531+
25262532
Series._setup_axes(['index'], info_axis=0, stat_axis=0,
25272533
aliases={'rows': 0})
25282534
Series._add_numeric_operations()

pandas/core/strings.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22

33
from pandas.compat import zip
44
from pandas.core.common import isnull, _values_from_object
5-
from pandas.core.series import Series
6-
from pandas.core.frame import DataFrame
75
import pandas.compat as compat
86
import re
97
import pandas.lib as lib
@@ -12,6 +10,8 @@
1210

1311

1412
def _get_array_list(arr, others):
13+
from pandas.core.series import Series
14+
1515
if len(others) and isinstance(_values_from_object(others)[0],
1616
(list, np.ndarray, Series)):
1717
arrays = [arr] + list(others)
@@ -95,6 +95,8 @@ def _na_map(f, arr, na_result=np.nan, dtype=object):
9595

9696

9797
def _map(f, arr, na_mask=False, na_value=np.nan, dtype=object):
98+
from pandas.core.series import Series
99+
98100
if not len(arr):
99101
return np.ndarray(0, dtype=dtype)
100102

@@ -459,6 +461,9 @@ def str_extract(arr, pat, flags=0):
459461
2 NaN NaN
460462
461463
"""
464+
from pandas.core.series import Series
465+
from pandas.core.frame import DataFrame
466+
462467
regex = re.compile(pat, flags=flags)
463468
# just to be safe, check this
464469
if regex.groups == 0:
@@ -510,6 +515,8 @@ def str_get_dummies(arr, sep='|'):
510515
See also ``pd.get_dummies``.
511516
512517
"""
518+
from pandas.core.frame import DataFrame
519+
513520
# TODO remove this hack?
514521
arr = arr.fillna('')
515522
try:
@@ -643,6 +650,9 @@ def str_split(arr, pat=None, n=None, return_type='series'):
643650
-------
644651
split : array
645652
"""
653+
from pandas.core.series import Series
654+
from pandas.core.frame import DataFrame
655+
646656
if return_type not in ('series', 'frame'):
647657
raise ValueError("return_type must be {'series', 'frame'}")
648658
if pat is None:
@@ -949,6 +959,9 @@ def __iter__(self):
949959
g = self.get(i)
950960

951961
def _wrap_result(self, result):
962+
from pandas.core.series import Series
963+
from pandas.core.frame import DataFrame
964+
952965
if not hasattr(result, 'ndim'):
953966
return result
954967
elif result.ndim == 1:

pandas/tests/test_categorical.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -909,8 +909,8 @@ def test_searchsorted(self):
909909
exp = np.array([1])
910910
self.assert_numpy_array_equal(res, exp)
911911
self.assert_numpy_array_equal(res, chk)
912-
913-
# Searching for a value that is not present in the Categorical
912+
913+
# Searching for a value that is not present in the Categorical
914914
res = c1.searchsorted(['bread', 'eggs'])
915915
chk = s1.searchsorted(['bread', 'eggs'])
916916
exp = np.array([1, 4])
@@ -927,7 +927,7 @@ def test_searchsorted(self):
927927
# As above, but with a sorter array to reorder an unsorted array
928928
res = c2.searchsorted(['bread', 'eggs'], side='right', sorter=[0, 1, 2, 3, 5, 4])
929929
chk = s2.searchsorted(['bread', 'eggs'], side='right', sorter=[0, 1, 2, 3, 5, 4])
930-
exp = np.array([3, 5]) # eggs after donuts, after switching milk and donuts
930+
exp = np.array([3, 5]) # eggs after donuts, after switching milk and donuts
931931
self.assert_numpy_array_equal(res, exp)
932932
self.assert_numpy_array_equal(res, chk)
933933

@@ -2516,6 +2516,13 @@ def get_dir(s):
25162516
results = get_dir(s)
25172517
tm.assert_almost_equal(results,list(sorted(set(ok_for_cat))))
25182518

2519+
def test_cat_accessor_api(self):
2520+
# GH 9322
2521+
from pandas.core.categorical import CategoricalAccessor
2522+
self.assertIs(Series.cat, CategoricalAccessor)
2523+
s = Series(list('aabbcde')).astype('category')
2524+
self.assertIsInstance(s.cat, CategoricalAccessor)
2525+
25192526
def test_pickle_v0_14_1(self):
25202527
cat = pd.Categorical(values=['a', 'b', 'c'],
25212528
categories=['a', 'b', 'c', 'd'],

pandas/tests/test_series.py

+9
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,15 @@ def test_valid_dt_with_missing_values(self):
231231
expected = Series([time(0),time(0),np.nan,time(0),time(0)],dtype='object')
232232
tm.assert_series_equal(result, expected)
233233

234+
def test_dt_accessor_api(self):
235+
# GH 9322
236+
from pandas.tseries.common import (CombinedDatetimelikeProperties,
237+
DatetimeProperties)
238+
self.assertIs(Series.dt, CombinedDatetimelikeProperties)
239+
240+
s = Series(date_range('2000-01-01', periods=3))
241+
self.assertIsInstance(s.dt, DatetimeProperties)
242+
234243
def test_binop_maybe_preserve_name(self):
235244

236245
# names match, preserve

pandas/tests/test_strings.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@ class TestStringMethods(tm.TestCase):
3232

3333
def test_api(self):
3434

35-
# GH 6106
36-
self.assertIsNone(Series.str)
35+
# GH 6106, GH 9322
36+
self.assertIs(Series.str, strings.StringMethods)
37+
self.assertIsInstance(Series(['']).str, strings.StringMethods)
3738

3839
def test_iter(self):
3940
# GH3638

pandas/tseries/common.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
import numpy as np
44
from pandas.core.base import PandasDelegate
55
from pandas.core import common as com
6-
from pandas import Series, DatetimeIndex, PeriodIndex, TimedeltaIndex
6+
from pandas.tseries.index import DatetimeIndex
7+
from pandas.tseries.period import PeriodIndex
8+
from pandas.tseries.tdi import TimedeltaIndex
79
from pandas import lib, tslib
810
from pandas.core.common import (_NS_DTYPE, _TD_DTYPE, is_period_arraylike,
911
is_datetime_arraylike, is_integer_dtype, is_list_like,
@@ -35,6 +37,7 @@ def maybe_to_datetimelike(data, copy=False):
3537
DelegatedClass
3638
3739
"""
40+
from pandas import Series
3841

3942
if not isinstance(data, Series):
4043
raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
@@ -59,6 +62,8 @@ def __init__(self, values, index):
5962
self.index = index
6063

6164
def _delegate_property_get(self, name):
65+
from pandas import Series
66+
6267
result = getattr(self.values,name)
6368

6469
# maybe need to upcast (ints)
@@ -82,6 +87,8 @@ def _delegate_property_set(self, name, value, *args, **kwargs):
8287
"supported. Change values on the original.")
8388

8489
def _delegate_method(self, name, *args, **kwargs):
90+
from pandas import Series
91+
8592
method = getattr(self.values, name)
8693
result = method(*args, **kwargs)
8794

@@ -175,6 +182,14 @@ class PeriodProperties(Properties):
175182
accessors=PeriodIndex._datetimelike_ops,
176183
typ='property')
177184

185+
186+
class CombinedDatetimelikeProperties(DatetimeProperties, TimedeltaProperties):
187+
# This class is never instantiated, and exists solely for the benefit of
188+
# the Series.dt class property. For Series objects, .dt will always be one
189+
# of the more specific classes above.
190+
__doc__ = DatetimeProperties.__doc__
191+
192+
178193
def _concat_compat(to_concat, axis=0):
179194
"""
180195
provide concatenation of an datetimelike array of arrays each of which is a single

0 commit comments

Comments
 (0)