From b77e103086ab71af19487d67884e2e52aab9e2ba Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 18 Jul 2017 22:25:31 -0700 Subject: [PATCH 01/15] Move PandasDelegate and AccessorProperty; update imports --- pandas/core/accessors.py | 91 ++++++++++++++++++++++++++++++++ pandas/core/base.py | 83 ----------------------------- pandas/core/categorical.py | 3 +- pandas/core/frame.py | 7 ++- pandas/core/indexes/accessors.py | 3 +- pandas/core/indexes/category.py | 6 +-- pandas/core/series.py | 10 ++-- pandas/core/strings.py | 3 +- pandas/tests/test_base.py | 3 +- 9 files changed, 111 insertions(+), 98 deletions(-) create mode 100644 pandas/core/accessors.py diff --git a/pandas/core/accessors.py b/pandas/core/accessors.py new file mode 100644 index 0000000000000..ed09ac51d1853 --- /dev/null +++ b/pandas/core/accessors.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from pandas.core.base import PandasObject + + +class PandasDelegate(PandasObject): + """ an abstract base class for delegating methods/properties """ + + def _delegate_property_get(self, name, *args, **kwargs): + raise TypeError("You cannot access the " + "property {name}".format(name=name)) + + def _delegate_property_set(self, name, value, *args, **kwargs): + raise TypeError("The property {name} cannot be set".format(name=name)) + + def _delegate_method(self, name, *args, **kwargs): + raise TypeError("You cannot call method {name}".format(name=name)) + + @classmethod + def _add_delegate_accessors(cls, delegate, accessors, typ, + overwrite=False): + """ + add accessors to cls from the delegate class + + Parameters + ---------- + cls : the class to add the methods/properties to + delegate : the class to get methods/properties & doc-strings + acccessors : string list of accessors to add + typ : 'property' or 'method' + overwrite : boolean, default False + overwrite the method/property in the target class if it exists + """ + + def _create_delegator_property(name): + + def _getter(self): + return self._delegate_property_get(name) + + def _setter(self, new_values): + return self._delegate_property_set(name, new_values) + + _getter.__name__ = name + _setter.__name__ = name + + return property(fget=_getter, fset=_setter, + doc=getattr(delegate, name).__doc__) + + def _create_delegator_method(name): + + def f(self, *args, **kwargs): + return self._delegate_method(name, *args, **kwargs) + + f.__name__ = name + f.__doc__ = getattr(delegate, name).__doc__ + + return f + + for name in accessors: + + if typ == 'property': + f = _create_delegator_property(name) + else: + f = _create_delegator_method(name) + + # don't overwrite existing methods/properties + if overwrite or not hasattr(cls, name): + setattr(cls, name, f) + + +class AccessorProperty(object): + """Descriptor for implementing accessor properties like Series.str + """ + + def __init__(self, accessor_cls, construct_accessor): + self.accessor_cls = accessor_cls + self.construct_accessor = construct_accessor + self.__doc__ = accessor_cls.__doc__ + + def __get__(self, instance, owner=None): + if instance is None: + # this ensures that Series.str. is well defined + return self.accessor_cls + return self.construct_accessor(instance) + + def __set__(self, instance, value): + raise AttributeError("can't set attribute") + + def __delete__(self, instance): + raise AttributeError("can't delete attribute") diff --git a/pandas/core/base.py b/pandas/core/base.py index 97c4c8626dcbb..43aa8ce5d36c5 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -162,91 +162,8 @@ def __setattr__(self, key, value): object.__setattr__(self, key, value) -class PandasDelegate(PandasObject): - """ an abstract base class for delegating methods/properties """ - def _delegate_property_get(self, name, *args, **kwargs): - raise TypeError("You cannot access the " - "property {name}".format(name=name)) - def _delegate_property_set(self, name, value, *args, **kwargs): - raise TypeError("The property {name} cannot be set".format(name=name)) - - def _delegate_method(self, name, *args, **kwargs): - raise TypeError("You cannot call method {name}".format(name=name)) - - @classmethod - def _add_delegate_accessors(cls, delegate, accessors, typ, - overwrite=False): - """ - add accessors to cls from the delegate class - - Parameters - ---------- - cls : the class to add the methods/properties to - delegate : the class to get methods/properties & doc-strings - acccessors : string list of accessors to add - typ : 'property' or 'method' - overwrite : boolean, default False - overwrite the method/property in the target class if it exists - """ - - def _create_delegator_property(name): - - def _getter(self): - return self._delegate_property_get(name) - - def _setter(self, new_values): - return self._delegate_property_set(name, new_values) - - _getter.__name__ = name - _setter.__name__ = name - - return property(fget=_getter, fset=_setter, - doc=getattr(delegate, name).__doc__) - - def _create_delegator_method(name): - - def f(self, *args, **kwargs): - return self._delegate_method(name, *args, **kwargs) - - f.__name__ = name - f.__doc__ = getattr(delegate, name).__doc__ - - return f - - for name in accessors: - - if typ == 'property': - f = _create_delegator_property(name) - else: - f = _create_delegator_method(name) - - # don't overwrite existing methods/properties - if overwrite or not hasattr(cls, name): - setattr(cls, name, f) - - -class AccessorProperty(object): - """Descriptor for implementing accessor properties like Series.str - """ - - def __init__(self, accessor_cls, construct_accessor): - self.accessor_cls = accessor_cls - self.construct_accessor = construct_accessor - self.__doc__ = accessor_cls.__doc__ - - def __get__(self, instance, owner=None): - if instance is None: - # this ensures that Series.str. is well defined - return self.accessor_cls - return self.construct_accessor(instance) - - def __set__(self, instance, value): - raise AttributeError("can't set attribute") - - def __delete__(self, instance): - raise AttributeError("can't delete attribute") class GroupByError(Exception): diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index afae11163b0dc..7fce3e9444d1a 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -29,7 +29,8 @@ from pandas.core.common import is_null_slice from pandas.core.algorithms import factorize, take_1d, unique1d -from pandas.core.base import (PandasObject, PandasDelegate, +from pandas.core.accessors import PandasDelegate +from pandas.core.base import (PandasObject, NoNewAttributesMixin, _shared_docs) import pandas.core.common as com from pandas.core.missing import interpolate_2d diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9a79ca1d4eab1..fb03535f34083 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -88,10 +88,9 @@ from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex -import pandas.core.base as base +from pandas.core import accessors, base, nanops, ops import pandas.core.common as com -import pandas.core.nanops as nanops -import pandas.core.ops as ops + import pandas.io.formats.format as fmt import pandas.io.formats.console as console from pandas.io.formats.printing import pprint_thing @@ -6006,7 +6005,7 @@ def _put_str(s, space): # ---------------------------------------------------------------------- # Add plotting methods to DataFrame -DataFrame.plot = base.AccessorProperty(gfx.FramePlotMethods, +DataFrame.plot = accessors.AccessorProperty(gfx.FramePlotMethods, gfx.FramePlotMethods) DataFrame.hist = gfx.hist_frame diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index f1fb9a8ad93a7..78002867c3cf0 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -11,7 +11,8 @@ is_timedelta64_dtype, is_categorical_dtype, is_list_like) -from pandas.core.base import PandasDelegate, NoNewAttributesMixin +from pandas.core.accessors import PandasDelegate +from pandas.core.base import NoNewAttributesMixin from pandas.core.indexes.datetimes import DatetimeIndex from pandas._libs.period import IncompatibleFrequency # noqa from pandas.core.indexes.period import PeriodIndex diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index e8427f847dd2d..d483924236c41 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -19,15 +19,15 @@ from pandas.util._decorators import Appender, cache_readonly from pandas.core.config import get_option from pandas.core.indexes.base import Index, _index_shared_docs -import pandas.core.base as base -import pandas.core.missing as missing + +from pandas.core import base, accessors, missing import pandas.core.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update(dict(target_klass='CategoricalIndex')) -class CategoricalIndex(Index, base.PandasDelegate): +class CategoricalIndex(Index, accessors.PandasDelegate): """ Immutable Index implementing an ordered, sliceable set. CategoricalIndex diff --git a/pandas/core/series.py b/pandas/core/series.py index 219eca4277f32..24f7add7e0921 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -50,7 +50,9 @@ from pandas.core.index import (Index, MultiIndex, InvalidIndexError, Float64Index, _ensure_index) from pandas.core.indexing import check_bool_indexer, maybe_convert_indices -from pandas.core import generic, base + +from pandas.core import generic, base, accessors + from pandas.core.internals import SingleBlockManager from pandas.core.categorical import Categorical, CategoricalAccessor import pandas.core.strings as strings @@ -2891,7 +2893,7 @@ def _make_dt_accessor(self): raise AttributeError("Can only use .dt accessor with datetimelike " "values") - dt = base.AccessorProperty(CombinedDatetimelikeProperties, + dt = accessors.AccessorProperty(CombinedDatetimelikeProperties, _make_dt_accessor) # ------------------------------------------------------------------------- @@ -2903,7 +2905,7 @@ def _make_cat_accessor(self): "'category' dtype") return CategoricalAccessor(self.values, self.index) - cat = base.AccessorProperty(CategoricalAccessor, _make_cat_accessor) + cat = accessors.AccessorProperty(CategoricalAccessor, _make_cat_accessor) def _dir_deletions(self): return self._accessors @@ -3103,7 +3105,7 @@ def create_from_value(value, index, dtype): import pandas.plotting._core as _gfx # noqa -Series.plot = base.AccessorProperty(_gfx.SeriesPlotMethods, +Series.plot = accessors.AccessorProperty(_gfx.SeriesPlotMethods, _gfx.SeriesPlotMethods) Series.hist = _gfx.hist_series diff --git a/pandas/core/strings.py b/pandas/core/strings.py index cd7e313b13f1e..a49b558309bee 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -16,7 +16,8 @@ from pandas.core.algorithms import take_1d import pandas.compat as compat -from pandas.core.base import AccessorProperty, NoNewAttributesMixin +from pandas.core.accessors import AccessorProperty +from pandas.core.base import NoNewAttributesMixin from pandas.util._decorators import Appender import re import pandas._libs.lib as lib diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 85976b9fabd66..56c316312b745 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -17,7 +17,8 @@ Timedelta, IntervalIndex, Interval) from pandas.compat import StringIO from pandas.compat.numpy import np_array_datetime64_compat -from pandas.core.base import PandasDelegate, NoNewAttributesMixin +from pandas.core.accessors import PandasDelegate +from pandas.core.base import NoNewAttributesMixin from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin from pandas._libs.tslib import iNaT From dbc149df312f660c9cef2035b7a95c10d8f77f8f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 19 Jul 2017 23:21:07 -0700 Subject: [PATCH 02/15] Move apply _shared_docs to functions and attach to methods with copy Make str_normalize as func; rename copy--> copy_doc to avoid stdlib name --- pandas/core/strings.py | 157 ++++++++++++++++++++++------------------- 1 file changed, 83 insertions(+), 74 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index a49b558309bee..42ac8fe343b56 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -939,7 +939,33 @@ def str_find(arr, sub, start=0, end=None, side='left'): return _na_map(f, arr, dtype=int) +_shared_docs['index'] = textwrap.dedent(""" + Return %(side)s indexes in each strings where the substring is + fully contained between [start:end]. This is the same as + ``str.%(similar)s`` except instead of returning -1, it raises a ValueError + when the substring is not found. Equivalent to standard ``str.%(method)s``. + Parameters + ---------- + sub : str + Substring being searched + start : int + Left edge index + end : int + Right edge index + + Returns + ------- + found : Series/Index of objects + + See Also + -------- + %(also)s + """) + +@Appender(_shared_docs['index'] % + dict(side='lowest', similar='find', method='index', + also='rindex : Return highest indexes in each strings')) def str_index(arr, sub, start=0, end=None, side='left'): if not isinstance(sub, compat.string_types): msg = 'expected a string object, not {0}' @@ -1125,6 +1151,18 @@ def f(x): return _na_map(f, arr) +_shared_docs['str_strip'] = textwrap.dedent(""" + Strip whitespace (including newlines) from each string in the + Series/Index from %(side)s. Equivalent to :meth:`str.%(method)s`. + + Returns + ------- + stripped : Series/Index of objects + """) + + +@Appender(_shared_docs['str_strip'] % dict(side='left and right sides', + method='strip')) def str_strip(arr, to_strip=None, side='both'): """ Strip whitespace (including newlines) from each string in the @@ -1317,6 +1355,27 @@ def str_encode(arr, encoding, errors="strict"): return _na_map(f, arr) +def str_normalize(arr, form): + """ + Return the Unicode normal form for the strings in the Series/Index. + For more information on the forms, see the + :func:`unicodedata.normalize`. + + Parameters + ---------- + form : {'NFC', 'NFKC', 'NFD', 'NFKD'} + Unicode form + + Returns + ------- + normalized : Series/Index of objects + """ + import unicodedata + f = lambda x: unicodedata.normalize(form, compat.u_safe(x)) + result = _na_map(f, arr) + return result + + def _noarg_wrapper(f, docstring=None, **kargs): def wrapper(self): result = _na_map(f, self._data, **kargs) @@ -1353,7 +1412,7 @@ def wrapper3(self, pat, na=np.nan): return wrapper -def copy(source): +def copy_doc(source): "Copy a docstring from another source function (if present)" def do_copy(target): @@ -1466,18 +1525,18 @@ def cons_row(x): cons = self._orig._constructor return cons(result, name=name, index=index) - @copy(str_cat) + @copy_doc(str_cat) def cat(self, others=None, sep=None, na_rep=None): data = self._orig if self._is_categorical else self._data result = str_cat(data, others=others, sep=sep, na_rep=na_rep) return self._wrap_result(result, use_codes=(not self._is_categorical)) - @copy(str_split) + @copy_doc(str_split) def split(self, pat=None, n=-1, expand=False): result = str_split(self._data, pat, n=n) return self._wrap_result(result, expand=expand) - @copy(str_rsplit) + @copy_doc(str_rsplit) def rsplit(self, pat=None, n=-1, expand=False): result = str_rsplit(self._data, pat, n=n) return self._wrap_result(result, expand=expand) @@ -1548,40 +1607,40 @@ def rpartition(self, pat=' ', expand=True): result = _na_map(f, self._data) return self._wrap_result(result, expand=expand) - @copy(str_get) + @copy_doc(str_get) def get(self, i): result = str_get(self._data, i) return self._wrap_result(result) - @copy(str_join) + @copy_doc(str_join) def join(self, sep): result = str_join(self._data, sep) return self._wrap_result(result) - @copy(str_contains) + @copy_doc(str_contains) def contains(self, pat, case=True, flags=0, na=np.nan, regex=True): result = str_contains(self._data, pat, case=case, flags=flags, na=na, regex=regex) return self._wrap_result(result) - @copy(str_match) + @copy_doc(str_match) def match(self, pat, case=True, flags=0, na=np.nan, as_indexer=None): result = str_match(self._data, pat, case=case, flags=flags, na=na, as_indexer=as_indexer) return self._wrap_result(result) - @copy(str_replace) + @copy_doc(str_replace) def replace(self, pat, repl, n=-1, case=None, flags=0): result = str_replace(self._data, pat, repl, n=n, case=case, flags=flags) return self._wrap_result(result) - @copy(str_repeat) + @copy_doc(str_repeat) def repeat(self, repeats): result = str_repeat(self._data, repeats) return self._wrap_result(result) - @copy(str_pad) + @copy_doc(str_pad) def pad(self, width, side='left', fillchar=' '): result = str_pad(self._data, width, side=side, fillchar=fillchar) return self._wrap_result(result) @@ -1634,37 +1693,27 @@ def zfill(self, width): result = str_pad(self._data, width, side='left', fillchar='0') return self._wrap_result(result) - @copy(str_slice) + @copy_doc(str_slice) def slice(self, start=None, stop=None, step=None): result = str_slice(self._data, start, stop, step) return self._wrap_result(result) - @copy(str_slice_replace) + @copy_doc(str_slice_replace) def slice_replace(self, start=None, stop=None, repl=None): result = str_slice_replace(self._data, start, stop, repl) return self._wrap_result(result) - @copy(str_decode) + @copy_doc(str_decode) def decode(self, encoding, errors="strict"): result = str_decode(self._data, encoding, errors) return self._wrap_result(result) - @copy(str_encode) + @copy_doc(str_encode) def encode(self, encoding, errors="strict"): result = str_encode(self._data, encoding, errors) return self._wrap_result(result) - _shared_docs['str_strip'] = (""" - Strip whitespace (including newlines) from each string in the - Series/Index from %(side)s. Equivalent to :meth:`str.%(method)s`. - - Returns - ------- - stripped : Series/Index of objects - """) - - @Appender(_shared_docs['str_strip'] % dict(side='left and right sides', - method='strip')) + @copy_doc(str_strip) def strip(self, to_strip=None): result = str_strip(self._data, to_strip, side='both') return self._wrap_result(result) @@ -1681,12 +1730,12 @@ def rstrip(self, to_strip=None): result = str_strip(self._data, to_strip, side='right') return self._wrap_result(result) - @copy(str_wrap) + @copy_doc(str_wrap) def wrap(self, width, **kwargs): result = str_wrap(self._data, width, **kwargs) return self._wrap_result(result) - @copy(str_get_dummies) + @copy_doc(str_get_dummies) def get_dummies(self, sep='|'): # we need to cast to Series of strings as only that has all # methods available for making the dummies... @@ -1695,7 +1744,7 @@ def get_dummies(self, sep='|'): return self._wrap_result(result, use_codes=(not self._is_categorical), name=name, expand=True) - @copy(str_translate) + @copy_doc(str_translate) def translate(self, table, deletechars=None): result = str_translate(self._data, table, deletechars) return self._wrap_result(result) @@ -1705,11 +1754,11 @@ def translate(self, table, deletechars=None): endswith = _pat_wrapper(str_endswith, na=True) findall = _pat_wrapper(str_findall, flags=True) - @copy(str_extract) + @copy_doc(str_extract) def extract(self, pat, flags=0, expand=None): return str_extract(self, pat, flags=flags, expand=expand) - @copy(str_extractall) + @copy_doc(str_extractall) def extractall(self, pat, flags=0): return str_extractall(self._orig, pat, flags=flags) @@ -1750,52 +1799,12 @@ def rfind(self, sub, start=0, end=None): result = str_find(self._data, sub, start=start, end=end, side='right') return self._wrap_result(result) + @copy_doc(str_normalize) def normalize(self, form): - """Return the Unicode normal form for the strings in the Series/Index. - For more information on the forms, see the - :func:`unicodedata.normalize`. - - Parameters - ---------- - form : {'NFC', 'NFKC', 'NFD', 'NFKD'} - Unicode form - - Returns - ------- - normalized : Series/Index of objects - """ - import unicodedata - f = lambda x: unicodedata.normalize(form, compat.u_safe(x)) - result = _na_map(f, self._data) + result = str_normalize(self._data, form) return self._wrap_result(result) - _shared_docs['index'] = (""" - Return %(side)s indexes in each strings where the substring is - fully contained between [start:end]. This is the same as - ``str.%(similar)s`` except instead of returning -1, it raises a ValueError - when the substring is not found. Equivalent to standard ``str.%(method)s``. - - Parameters - ---------- - sub : str - Substring being searched - start : int - Left edge index - end : int - Right edge index - - Returns - ------- - found : Series/Index of objects - - See Also - -------- - %(also)s - """) - - @Appender(_shared_docs['index'] % - dict(side='lowest', similar='find', method='index', - also='rindex : Return highest indexes in each strings')) + @copy_doc(str_index) def index(self, sub, start=0, end=None): result = str_index(self._data, sub, start=start, end=end, side='left') return self._wrap_result(result) From 3c77d941f2514d5b35db8538be713a2aef0f7d6f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 19 Jul 2017 23:28:32 -0700 Subject: [PATCH 03/15] Implement _make_accessor as classmethod on StringMethods Expand some inline if/else blocks --- pandas/core/strings.py | 60 +++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 42ac8fe343b56..977868334ca58 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -16,6 +16,8 @@ from pandas.core.algorithms import take_1d import pandas.compat as compat +from pandas.core import accessors + from pandas.core.accessors import AccessorProperty from pandas.core.base import NoNewAttributesMixin from pandas.util._decorators import Appender @@ -1437,7 +1439,11 @@ class StringMethods(NoNewAttributesMixin): def __init__(self, data): self._is_categorical = is_categorical_dtype(data) - self._data = data.cat.categories if self._is_categorical else data + if self._is_categorical: + self._data = data.cat.categories + else: + self._data = data + # save orig to blow up categoricals to the right type self._orig = data self._freeze() @@ -1456,8 +1462,7 @@ def __iter__(self): i += 1 g = self.get(i) - def _wrap_result(self, result, use_codes=True, - name=None, expand=None): + def _wrap_result(self, result, use_codes=True, name=None, expand=None): from pandas.core.index import Index, MultiIndex @@ -1475,7 +1480,7 @@ def _wrap_result(self, result, use_codes=True, if expand is None: # infer from ndim if expand is not specified - expand = False if result.ndim == 1 else True + expand = result.ndim != 1 elif expand is True and not isinstance(self._orig, Index): # required when expand=True is explicitly specified @@ -1527,7 +1532,10 @@ def cons_row(x): @copy_doc(str_cat) def cat(self, others=None, sep=None, na_rep=None): - data = self._orig if self._is_categorical else self._data + if self._is_categorical: + data = self._orig + else: + data = self._data result = str_cat(data, others=others, sep=sep, na_rep=na_rep) return self._wrap_result(result, use_codes=(not self._is_categorical)) @@ -1739,7 +1747,10 @@ def wrap(self, width, **kwargs): def get_dummies(self, sep='|'): # we need to cast to Series of strings as only that has all # methods available for making the dummies... - data = self._orig.astype(str) if self._is_categorical else self._data + if self._is_categorical: + data = self._orig.astype(str) + else: + data = self._data result, name = str_get_dummies(data, sep) return self._wrap_result(result, use_codes=(not self._is_categorical), name=name, expand=True) @@ -1900,18 +1911,14 @@ def rindex(self, sub, start=0, end=None): docstring=_shared_docs['ismethods'] % _shared_docs['isdecimal']) - -class StringAccessorMixin(object): - """ Mixin to add a `.str` acessor to the class.""" - - # string methods - def _make_str_accessor(self): + @classmethod + def _make_accessor(cls, data): from pandas.core.index import Index - if (isinstance(self, ABCSeries) and - not ((is_categorical_dtype(self.dtype) and - is_object_dtype(self.values.categories)) or - (is_object_dtype(self.dtype)))): + if (isinstance(data, ABCSeries) and + not ((is_categorical_dtype(data.dtype) and + is_object_dtype(data.values.categories)) or + (is_object_dtype(data.dtype)))): # it's neither a string series not a categorical series with # strings inside the categories. # this really should exclude all series with any non-string values @@ -1920,23 +1927,34 @@ def _make_str_accessor(self): raise AttributeError("Can only use .str accessor with string " "values, which use np.object_ dtype in " "pandas") - elif isinstance(self, Index): + elif isinstance(data, Index): # can't use ABCIndex to exclude non-str # see scc/inferrence.pyx which can contain string values allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer') - if self.inferred_type not in allowed_types: + if data.inferred_type not in allowed_types: message = ("Can only use .str accessor with string values " "(i.e. inferred_type is 'string', 'unicode' or " "'mixed')") raise AttributeError(message) - if self.nlevels > 1: + if data.nlevels > 1: message = ("Can only use .str accessor with Index, not " "MultiIndex") raise AttributeError(message) - return StringMethods(self) + return StringAccessor(data) + +StringAccessor = StringMethods # Alias to mirror CategoricalAccessor + + +# TODO: This is only mixed in to Index (this PR takes it out of Series) +# and the _dir_additions/_dir_deletions won't play nicely with +# any other class this gets mixed into that *does* implement its own +# _dir_additions/_dir_deletions. This should be deprecated. +class StringAccessorMixin(object): + """ Mixin to add a `.str` acessor to the class.""" + - str = AccessorProperty(StringMethods, _make_str_accessor) + str = accessors.AccessorProperty(StringAccessor) def _dir_additions(self): return set() From 19f7ff623be2e185822981938d9075e281afb8b5 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 19 Jul 2017 23:31:35 -0700 Subject: [PATCH 04/15] Add example/recipe --- pandas/core/accessors.py | 356 +++++++++++++++++++++++++++++++++------ 1 file changed, 308 insertions(+), 48 deletions(-) diff --git a/pandas/core/accessors.py b/pandas/core/accessors.py index ed09ac51d1853..922ec61084cee 100644 --- a/pandas/core/accessors.py +++ b/pandas/core/accessors.py @@ -1,80 +1,198 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +""" -from pandas.core.base import PandasObject +An example/recipe for creating a custom accessor. -class PandasDelegate(PandasObject): - """ an abstract base class for delegating methods/properties """ +The primary use case for accessors is when a Series contains instances +of a particular class and we want to access properties/methods of these +instances in Series form. - def _delegate_property_get(self, name, *args, **kwargs): - raise TypeError("You cannot access the " - "property {name}".format(name=name)) +Suppose we have a custom State class representing US states: - def _delegate_property_set(self, name, value, *args, **kwargs): - raise TypeError("The property {name} cannot be set".format(name=name)) +class State(object): + def __repr__(self): + return repr(self.name) - def _delegate_method(self, name, *args, **kwargs): - raise TypeError("You cannot call method {name}".format(name=name)) + def __init__(self, name): + self.name = name + self._abbrev_dict = {'California': 'CA', 'Alabama': 'AL'} + + @property + def abbrev(self): + return self._abbrev_dict[self.name] + + @abbrev.setter + def abbrev(self, value): + self._abbrev_dict[self.name] = value + + def fips(self): + return {'California': 6, 'Alabama': 1}[self.name] + + +We can construct a series of these objects: + +>>> ser = pd.Series([State('Alabama'), State('California')]) +>>> ser +0 'Alabama' +1 'California' +dtype: object + +We would like direct access to the `abbrev` property and `fips` method. +One option is to access these manually with `apply`: + +>>> ser.apply(lambda x: x.fips()) +0 1 +1 6 +dtype: int64 + +But doing that repeatedly gets old in a hurry, so we decide to make a +custom accessor. This entails subclassing `PandasDelegate` to specify +what should be accessed and how. + +There are four methods that *may* be defined in this subclass, one of which +*must* be defined. The mandatory method is a classmethod called +`_make_accessor`. `_make_accessor` is responsible doing any validation on +inputs for the accessor. In this case, the inputs must be a Series +containing State objects. + + +class StateDelegate(PandasDelegate): + + def __init__(self, values): + self.values = values @classmethod - def _add_delegate_accessors(cls, delegate, accessors, typ, - overwrite=False): - """ - add accessors to cls from the delegate class - - Parameters - ---------- - cls : the class to add the methods/properties to - delegate : the class to get methods/properties & doc-strings - acccessors : string list of accessors to add - typ : 'property' or 'method' - overwrite : boolean, default False - overwrite the method/property in the target class if it exists - """ + def _make_accessor(cls, data): + if not isinstance(data, pd.Series): + raise ValueError('Input must be a Series of States') + elif not data.apply(lambda x: isinstance(x, State)).all(): + raise ValueError('All entries must be State objects') + return StateDelegate(data) + + +With `_make_accessor` defined, we have enough to create the accessor, but +not enough to actually do anything useful with it. In order to access +*methods* of State objects, we implement `_delegate_method`. `_delegate_method` +calls the underlying method for each object in the series and wraps these +in a new Series. The simplest version looks like: + + def _delegate_method(self, name, *args, **kwargs): + state_method = lambda x: getattr(x, name)(*args, **kwargs) + return self.values.apply(state_method) + +Similarly in order to access *properties* of State objects, we need to +implement `_delegate_property_get`: + + def _delegate_property_get(self, name): + state_property = lambda x: getattr(x, name) + return self.values.apply(state_property) + + +On ocassion, we may want to be able to *set* property being accessed. +This is discouraged, but allowed (as long as the class being accessed +allows the property to be set). Doing so requires implementing +`_delegate_property_set`: + + def _delegate_property_set(self, name, new_values): + for (obj, val) in zip(self.values, new_values): + setattr(obj, name, val) + + +With these implemented, `StateDelegate` knows how to handle methods and +properties. We just need to tell it what names and properties it is +supposed to handle. This is done by decorating the `StateDelegate` +class with `pd.accessors.wrap_delegate_names`. We apply the decorator +once with a list of all the methods the accessor should recognize and +once with a list of all the properties the accessor should recognize. + + +@wrap_delegate_names(delegate=State, + accessors=["fips"], + typ="method") +@wrap_delegate_names(delegate=State, + accessors=["abbrev"], + typ="property") +class StateDelegate(PandasDelegate): + [...] + + +We can now pin the `state` accessor to the pd.Series class (we could +alternatively pin it to the pd.Index class with a slightly different +implementation above): + +pd.Series.state = accessors.AccessorProperty(StateDelegate) + + +>>> ser = pd.Series([State('Alabama'), State('California')]) +>>> isinstance(ser.state, StateDelegate) +True + +>>> ser.state.abbrev +0 AL +1 CA +dtype: object + +>>> ser.state.fips() +0 1 +1 6 + +>>> ser.state.abbrev = ['Foo', 'Bar'] +>>> ser.state.abbrev +0 Foo +1 Bar +dtype: object + - def _create_delegator_property(name): - def _getter(self): - return self._delegate_property_get(name) +""" +from pandas.core.base import PandasObject +from pandas.core import common as com - def _setter(self, new_values): - return self._delegate_property_set(name, new_values) +class PandasDelegate(PandasObject): + """ an abstract base class for delegating methods/properties - _getter.__name__ = name - _setter.__name__ = name + Usage: To make a custom accessor, start by subclassing `Delegate`. + See example in the module-level docstring. - return property(fget=_getter, fset=_setter, - doc=getattr(delegate, name).__doc__) + """ - def _create_delegator_method(name): + def __init__(self, values): + self.values = values + # #self._freeze() - def f(self, *args, **kwargs): - return self._delegate_method(name, *args, **kwargs) + @classmethod + def _make_accessor(cls, data): # pragma: no cover + raise NotImplementedError('It is up to subclasses to implement ' + '_make_accessor. This does input validation on the object to ' + 'which the accessor is being pinned. ' + 'It should return an instance of `cls`.') - f.__name__ = name - f.__doc__ = getattr(delegate, name).__doc__ - return f + def _delegate_property_get(self, name, *args, **kwargs): + raise TypeError("You cannot access the " + "property {name}".format(name=name)) - for name in accessors: + def _delegate_property_set(self, name, value, *args, **kwargs): + raise TypeError("The property {name} cannot be set".format(name=name)) - if typ == 'property': - f = _create_delegator_property(name) - else: - f = _create_delegator_method(name) + def _delegate_method(self, name, *args, **kwargs): + raise TypeError("You cannot call method {name}".format(name=name)) - # don't overwrite existing methods/properties - if overwrite or not hasattr(cls, name): - setattr(cls, name, f) class AccessorProperty(object): """Descriptor for implementing accessor properties like Series.str """ - def __init__(self, accessor_cls, construct_accessor): + def __init__(self, accessor_cls, construct_accessor=None): self.accessor_cls = accessor_cls + + if construct_accessor is None: + # accessor_cls._make_accessor must be a classmethod + construct_accessor = accessor_cls._make_accessor + self.construct_accessor = construct_accessor self.__doc__ = accessor_cls.__doc__ @@ -89,3 +207,145 @@ def __set__(self, instance, value): def __delete__(self, instance): raise AttributeError("can't delete attribute") + + +class Delegator(object): + """ Delegator class contains methods that are used by PandasDelegate + and Accesor subclasses, but that so not ultimately belong in + the namespaces of user-facing classes. + + Many of these methods *could* be module-level functions, but are + retained as staticmethods for organization purposes. + """ + + @staticmethod + def create_delegator_property(name, delegate): + # Note: we really only need the `delegate` here for the docstring + + def _getter(self): + return self._delegate_property_get(name) + + def _setter(self, new_values): + return self._delegate_property_set(name, new_values) + # TODO: not hit in tests; not sure this is something we + # really want anyway + + _getter.__name__ = name + _setter.__name__ = name + _doc = getattr(delegate, name).__doc__ + return property(fget=_getter, fset=_setter, doc=_doc) + + + @staticmethod + def create_delegator_method(name, delegate): + # Note: we really only need the `delegate` here for the docstring + + def func(self, *args, **kwargs): + return self._delegate_method(name, *args, **kwargs) + + if callable(name): + # A function/method was passed directly instead of a name + # This may also render the `delegate` arg unnecessary. + func.__name__ = name.__name__ # TODO: is this generally valid? + func.__doc__ = name.__doc__ + else: + func.__name__ = name + func.__doc__ = getattr(delegate, name).__doc__ + return func + + + @staticmethod + def delegate_names(delegate, accessors, typ, overwrite=False): + """ + delegate_names decorates class definitions, e.g: + + @delegate_names(Categorical, ["categories", "ordered"], "property") + class CategoricalAccessor(PandasDelegate): + + @classmethod + def _make_accessor(cls, data): + [...] + + + This replaces the older usage in which following a class definition + we would use `Foo._add_delegate_accessors(...)`. The motivation + is that we would like to keep as much of a class's internals inside + the class definition. For things that we cannot keep directly + in the class definition, a decorator is more directly tied to + the definition than a method call outside the definition. + + """ + # Note: we really only need the `delegate` here for the docstring + + def add_delegate_accessors(cls): + """ + add accessors to cls from the delegate class + + Parameters + ---------- + cls : the class to add the methods/properties to + delegate : the class to get methods/properties & doc-strings + acccessors : string list of accessors to add + typ : 'property' or 'method' + overwrite : boolean, default False + overwrite the method/property in the target class if it exists + """ + for name in accessors: + if typ == "property": + func = Delegator.create_delegator_property(name, delegate) + else: + func = Delegator.create_delegator_method(name, delegate) + + # Allow for a callable to be passed instead of a name. + title = com._get_callable_name(name) + title = title or name + # don't overwrite existing methods/properties unless + # specifically told to do so + if overwrite or not hasattr(cls, title): + setattr(cls, title, func) + + return cls + + return add_delegate_accessors + + + +wrap_delegate_names = Delegator.delegate_names +# TODO: the `delegate` arg to `wrap_delegate_names` is really only relevant +# for a docstring. It'd be nice if we didn't require it and could duck-type +# instead. + +# TODO: There are 2-3 implementations of `_delegate_method` +# and `_delegate_property` that are common enough that we should consider +# making them the defaults. First, if the series being accessed has `name` +# method/property: +# +# def _delegate_method(self, name, *args, **kwargs): +# result = getattr(self.values, name)(*args, **kwargs) +# return result +# +# def _delegate_property_get(self, name): +# result = getattr(self.values, name) +# return result +# +# +# Alternately if the series being accessed does not have this attribute, +# but is a series of objects that do have the attribute: +# +# def _delegate_method(self, name, *args, **kwargs): +# meth = lambda x: getattr(x, name)(*args, **kwargs) +# return self.values.apply(meth) +# +# def _delegate_property_get(self, name): +# prop = lambda x: getattr(x, name) +# return self.values.apply(prop) +# +# +# `apply` would need to be changed to `map` if self.values is an Index. +# +# The third thing to consider moving into the general case is +# core.strings.StringMethods._wrap_result, which handles a bunch of cases +# for how to wrap delegated outputs. + + + From d152421114f6510e85dfa2e22e1cda4b3aa79096 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 19 Jul 2017 23:31:49 -0700 Subject: [PATCH 05/15] Test to go along with example/recipe Rename strings classes in tests Update imports in tests --- pandas/tests/test_accessors.py | 100 +++++++++++++++++++++++++++++++ pandas/tests/test_base.py | 25 ++++---- pandas/tests/test_categorical.py | 8 +-- pandas/tests/test_strings.py | 12 ++-- 4 files changed, 122 insertions(+), 23 deletions(-) create mode 100644 pandas/tests/test_accessors.py diff --git a/pandas/tests/test_accessors.py b/pandas/tests/test_accessors.py new file mode 100644 index 0000000000000..e6f2ac10824b2 --- /dev/null +++ b/pandas/tests/test_accessors.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" + +An example/recipe/test for implementing custom accessors. + +""" + +import pandas as pd + +from pandas.core.accessors import (wrap_delegate_names, + PandasDelegate, AccessorProperty) + +class State(object): + def __repr__(self): + return repr(self.name) + + def __init__(self, name): + self.name = name + self._abbrev_dict = {'California': 'CA', 'Alabama': 'AL'} + + @property + def abbrev(self): + return self._abbrev_dict[self.name] + + @abbrev.setter + def abbrev(self, value): + self._abbrev_dict[self.name] = value + + def fips(self): + return {'California': 6, 'Alabama': 1}[self.name] + + + +@wrap_delegate_names(delegate=State, + accessors=["fips"], + typ="method") +@wrap_delegate_names(delegate=State, + accessors=["abbrev"], + typ="property") +class StateDelegate(PandasDelegate): + + def __init__(self, values): + self.values = values + #self._freeze() + + @classmethod + def _make_accessor(cls, data): + """ + When implementing custom accessors, `_make_accessor` is the place + to do validation that the attributes be accessed will actually be + present in the underlying data. + """ + if not isinstance(data, pd.Series): + raise ValueError('Input must be a Series of States') + elif not data.apply(lambda x: isinstance(x, State)).all(): + raise ValueError('All entries must be State objects') + return StateDelegate(data) + + def _delegate_method(self, name, *args, **kwargs): + state_method = lambda x: getattr(x, name)(*args, **kwargs) + return self.values.apply(state_method) + + def _delegate_property_get(self, name): + state_property = lambda x: getattr(x, name) + return self.values.apply(state_property) + + def _delegate_property_set(self, name, new_values): + """ + Setting properties via accessors is permitted but discouraged. + """ + for (obj, val) in zip(self.values, new_values): + setattr(obj, name, val) + + + + + +def test_geo_state_accessor(): + import pandas.util.testing as tm + + pd.Series.state = AccessorProperty(StateDelegate) + + ser = pd.Series([State('Alabama'), State('California')]) + + abbrev = pd.Series(['AL', 'CA']) + tm.assert_series_equal(ser.state.abbrev, abbrev) + + fips = pd.Series([1, 6]) + tm.assert_series_equal(ser.state.fips(), fips) + + + + ser.state.abbrev = ['Foo', 'Bar'] + + new_abbrev = pd.Series(['Foo', 'Bar']) + tm.assert_series_equal(ser.state.abbrev, new_abbrev) + + + diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 56c316312b745..65fd7cd1780f0 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -17,7 +17,8 @@ Timedelta, IntervalIndex, Interval) from pandas.compat import StringIO from pandas.compat.numpy import np_array_datetime64_compat -from pandas.core.accessors import PandasDelegate +from pandas.core import accessors + from pandas.core.base import NoNewAttributesMixin from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin from pandas._libs.tslib import iNaT @@ -105,7 +106,7 @@ def bar(self, *args, **kwargs): """ a test bar method """ pass - class Delegate(PandasDelegate): + class Delegate(accessors.PandasDelegate): def __init__(self, obj): self.obj = obj @@ -113,20 +114,18 @@ def __init__(self, obj): def setup_method(self, method): pass - def test_invalida_delgation(self): + def test_invalid_delegation(self): # these show that in order for the delegation to work # the _delegate_* methods need to be overriden to not raise a TypeError - self.Delegate._add_delegate_accessors( - delegate=self.Delegator, - accessors=self.Delegator._properties, - typ='property' - ) - self.Delegate._add_delegate_accessors( - delegate=self.Delegator, - accessors=self.Delegator._methods, - typ='method' - ) + for name in self.Delegator._properties: + func = accessors.Delegator.create_delegator_property(name, self.Delegator) + setattr(self.Delegate, name, func) + + for name in self.Delegator._methods: + func = accessors.Delegator.create_delegator_method(name, self.Delegator) + setattr(self.Delegate, name, func) + delegate = self.Delegate(self.Delegator()) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 667b26c24c662..2f84124e0154e 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -4276,11 +4276,11 @@ def test_cat_accessor_no_new_attributes(self): def test_str_accessor_api_for_categorical(self): # https://github.com/pandas-dev/pandas/issues/10661 - from pandas.core.strings import StringMethods + from pandas.core.strings import StringDelegate s = Series(list('aabb')) s = s + " " + s c = s.astype('category') - assert isinstance(c.str, StringMethods) + assert isinstance(c.str, StringDelegate) # str functions, which need special arguments special_func_defs = [ @@ -4352,7 +4352,7 @@ def test_str_accessor_api_for_categorical(self): def test_dt_accessor_api_for_categorical(self): # https://github.com/pandas-dev/pandas/issues/10661 - from pandas.core.indexes.accessors import Properties + from pandas.core.indexes.accessors import BaseDatetimeAccessor s_dr = Series(date_range('1/1/2015', periods=5, tz="MET")) c_dr = s_dr.astype("category") @@ -4372,7 +4372,7 @@ def test_dt_accessor_api_for_categorical(self): ("Period", get_ops(PeriodIndex), s_pr, c_pr), ("Timedelta", get_ops(TimedeltaIndex), s_tdr, c_tdr)] - assert isinstance(c_dr.dt, Properties) + assert isinstance(c_dr.dt, BaseDatetimeAccessor) special_func_defs = [ ('strftime', ("%Y-%m-%d",), {}), diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index bb31fb9260160..9d83937c1380f 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -24,8 +24,8 @@ class TestStringMethods(object): def test_api(self): # GH 6106, GH 9322 - assert Series.str is strings.StringMethods - assert isinstance(Series(['']).str, strings.StringMethods) + assert Series.str is strings.StringAccessor + assert isinstance(Series(['']).str, strings.StringAccessor) # GH 9184 invalid = Series([1]) @@ -2708,14 +2708,14 @@ def test_index_str_accessor_visibility(self): (['aa', datetime(2011, 1, 1)], 'mixed')] for values, tp in cases: idx = Index(values) - assert isinstance(Series(values).str, StringMethods) - assert isinstance(idx.str, StringMethods) + assert isinstance(Series(values).str, strings.StringAccessor) + assert isinstance(idx.str, strings.StringAccessor) assert idx.inferred_type == tp for values, tp in cases: idx = Index(values) - assert isinstance(Series(values).str, StringMethods) - assert isinstance(idx.str, StringMethods) + assert isinstance(Series(values).str, strings.StringAccessor) + assert isinstance(idx.str, strings.StringAccessor) assert idx.inferred_type == tp cases = [([1, np.nan], 'floating'), From 101e7e5bbf255a1feb6fb0bd6218cf8b0aaa9c4b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 19 Jul 2017 23:33:37 -0700 Subject: [PATCH 06/15] Transition to _make_accessor flake8 fixes Update import name --- pandas/core/accessors.py | 20 +++----- pandas/core/base.py | 4 -- pandas/core/categorical.py | 44 ++++++++++------- pandas/core/frame.py | 4 +- pandas/core/indexes/accessors.py | 82 ++++++++++++++++++-------------- pandas/core/indexes/category.py | 39 ++++++++------- pandas/core/series.py | 77 ++++++++++++------------------ pandas/core/strings.py | 15 ++++-- pandas/tests/test_accessors.py | 55 +++++++++------------ pandas/tests/test_base.py | 9 ++-- pandas/tests/test_categorical.py | 4 +- 11 files changed, 177 insertions(+), 176 deletions(-) diff --git a/pandas/core/accessors.py b/pandas/core/accessors.py index 922ec61084cee..be738f9de961e 100644 --- a/pandas/core/accessors.py +++ b/pandas/core/accessors.py @@ -74,9 +74,9 @@ def _make_accessor(cls, data): With `_make_accessor` defined, we have enough to create the accessor, but not enough to actually do anything useful with it. In order to access -*methods* of State objects, we implement `_delegate_method`. `_delegate_method` -calls the underlying method for each object in the series and wraps these -in a new Series. The simplest version looks like: +*methods* of State objects, we implement `_delegate_method`. +`_delegate_method` calls the underlying method for each object in the +series and wraps these in a new Series. The simplest version looks like: def _delegate_method(self, name, *args, **kwargs): state_method = lambda x: getattr(x, name)(*args, **kwargs) @@ -150,6 +150,7 @@ class StateDelegate(PandasDelegate): from pandas.core.base import PandasObject from pandas.core import common as com + class PandasDelegate(PandasObject): """ an abstract base class for delegating methods/properties @@ -164,12 +165,12 @@ def __init__(self, values): @classmethod def _make_accessor(cls, data): # pragma: no cover - raise NotImplementedError('It is up to subclasses to implement ' + raise NotImplementedError( + 'It is up to subclasses to implement ' '_make_accessor. This does input validation on the object to ' 'which the accessor is being pinned. ' 'It should return an instance of `cls`.') - def _delegate_property_get(self, name, *args, **kwargs): raise TypeError("You cannot access the " "property {name}".format(name=name)) @@ -181,7 +182,6 @@ def _delegate_method(self, name, *args, **kwargs): raise TypeError("You cannot call method {name}".format(name=name)) - class AccessorProperty(object): """Descriptor for implementing accessor properties like Series.str """ @@ -235,7 +235,6 @@ def _setter(self, new_values): _doc = getattr(delegate, name).__doc__ return property(fget=_getter, fset=_setter, doc=_doc) - @staticmethod def create_delegator_method(name, delegate): # Note: we really only need the `delegate` here for the docstring @@ -246,14 +245,13 @@ def func(self, *args, **kwargs): if callable(name): # A function/method was passed directly instead of a name # This may also render the `delegate` arg unnecessary. - func.__name__ = name.__name__ # TODO: is this generally valid? + func.__name__ = name.__name__ # TODO: is this generally valid? func.__doc__ = name.__doc__ else: func.__name__ = name func.__doc__ = getattr(delegate, name).__doc__ return func - @staticmethod def delegate_names(delegate, accessors, typ, overwrite=False): """ @@ -309,7 +307,6 @@ def add_delegate_accessors(cls): return add_delegate_accessors - wrap_delegate_names = Delegator.delegate_names # TODO: the `delegate` arg to `wrap_delegate_names` is really only relevant # for a docstring. It'd be nice if we didn't require it and could duck-type @@ -346,6 +343,3 @@ def add_delegate_accessors(cls): # The third thing to consider moving into the general case is # core.strings.StringMethods._wrap_result, which handles a bunch of cases # for how to wrap delegated outputs. - - - diff --git a/pandas/core/base.py b/pandas/core/base.py index 43aa8ce5d36c5..70904eef418c6 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -162,10 +162,6 @@ def __setattr__(self, key, value): object.__setattr__(self, key, value) - - - - class GroupByError(Exception): pass diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 7fce3e9444d1a..617904de3dced 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -29,7 +29,7 @@ from pandas.core.common import is_null_slice from pandas.core.algorithms import factorize, take_1d, unique1d -from pandas.core.accessors import PandasDelegate +from pandas.core import accessors from pandas.core.base import (PandasObject, NoNewAttributesMixin, _shared_docs) import pandas.core.common as com @@ -2014,7 +2014,20 @@ def repeat(self, repeats, *args, **kwargs): # The Series.cat accessor -class CategoricalAccessor(PandasDelegate, NoNewAttributesMixin): +@accessors.wrap_delegate_names(delegate=Categorical, + accessors=["rename_categories", + "reorder_categories", + "add_categories", + "remove_categories", + "remove_unused_categories", + "set_categories", + "as_ordered", + "as_unordered"], + typ="method") +@accessors.wrap_delegate_names(delegate=Categorical, + accessors=["categories", "ordered"], + typ="property") +class CategoricalAccessor(accessors.PandasDelegate, NoNewAttributesMixin): """ Accessor object for categorical properties of the Series values. @@ -2037,6 +2050,13 @@ class CategoricalAccessor(PandasDelegate, NoNewAttributesMixin): """ + @classmethod + def _make_accessor(cls, values): + if not is_categorical_dtype(values.dtype): + msg = "Can only use .cat accessor with a 'category' dtype" + raise AttributeError(msg) + return CategoricalAccessor(values.values, values.index) + def __init__(self, values, index): self.categorical = values self.index = index @@ -2048,11 +2068,6 @@ def _delegate_property_get(self, name): def _delegate_property_set(self, name, new_values): return setattr(self.categorical, name, new_values) - @property - def codes(self): - from pandas import Series - return Series(self.categorical.codes, index=self.index) - def _delegate_method(self, name, *args, **kwargs): from pandas import Series method = getattr(self.categorical, name) @@ -2060,19 +2075,16 @@ def _delegate_method(self, name, *args, **kwargs): if res is not None: return Series(res, index=self.index) + # TODO: Can we get this from _delegate_property_get? + # Would need to get self.index into the result + @property + def codes(self): + from pandas import Series + return Series(self.categorical.codes, index=self.index) -CategoricalAccessor._add_delegate_accessors(delegate=Categorical, - accessors=["categories", - "ordered"], - typ='property') -CategoricalAccessor._add_delegate_accessors(delegate=Categorical, accessors=[ - "rename_categories", "reorder_categories", "add_categories", - "remove_categories", "remove_unused_categories", "set_categories", - "as_ordered", "as_unordered"], typ='method') # utility routines - def _get_codes_for_values(values, categories): """ utility routine to turn values into codes given the specified categories diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fb03535f34083..b10728a9a5863 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -88,7 +88,7 @@ from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex -from pandas.core import accessors, base, nanops, ops +from pandas.core import accessors, nanops, ops import pandas.core.common as com import pandas.io.formats.format as fmt @@ -6006,7 +6006,7 @@ def _put_str(s, space): # ---------------------------------------------------------------------- # Add plotting methods to DataFrame DataFrame.plot = accessors.AccessorProperty(gfx.FramePlotMethods, - gfx.FramePlotMethods) + gfx.FramePlotMethods) DataFrame.hist = gfx.hist_frame diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 78002867c3cf0..849573371b9e8 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -11,7 +11,8 @@ is_timedelta64_dtype, is_categorical_dtype, is_list_like) -from pandas.core.accessors import PandasDelegate +from pandas.core import accessors + from pandas.core.base import NoNewAttributesMixin from pandas.core.indexes.datetimes import DatetimeIndex from pandas._libs.period import IncompatibleFrequency # noqa @@ -85,7 +86,7 @@ def maybe_to_datetimelike(data, copy=False): "datetimelike index".format(type(data))) -class Properties(PandasDelegate, NoNewAttributesMixin): +class BaseDatetimeAccessor(accessors.PandasDelegate, NoNewAttributesMixin): def __init__(self, values, index, name, orig=None): self.values = values @@ -95,7 +96,7 @@ def __init__(self, values, index, name, orig=None): self._freeze() def _delegate_property_get(self, name): - from pandas import Series + from pandas import Series, DataFrame result = getattr(self.values, name) @@ -105,6 +106,9 @@ def _delegate_property_get(self, name): result = result.astype('int64') elif not is_list_like(result): return result + elif isinstance(result, DataFrame): + # e.g. TimedeltaProperties.components + return result.set_index(self.index) result = np.asarray(result) @@ -146,7 +150,18 @@ def _delegate_method(self, name, *args, **kwargs): return result -class DatetimeProperties(Properties): +# An alternative to decorating with @accessors.wrap_delegate_names +# is to define each method individually, e.g.: +# to_period = PandasDelegate._make_delegate_accessor(delegate=DatetimeIndex, +# name='to_period', +# typ='method') +@accessors.wrap_delegate_names(delegate=DatetimeIndex, + accessors=DatetimeIndex._datetimelike_ops, + typ='property') +@accessors.wrap_delegate_names(delegate=DatetimeIndex, + accessors=DatetimeIndex._datetimelike_methods, + typ='method') +class DatetimeProperties(BaseDatetimeAccessor): """ Accessor object for datetimelike properties of the Series values. @@ -164,17 +179,13 @@ def to_pydatetime(self): return self.values.to_pydatetime() -DatetimeProperties._add_delegate_accessors( - delegate=DatetimeIndex, - accessors=DatetimeIndex._datetimelike_ops, - typ='property') -DatetimeProperties._add_delegate_accessors( - delegate=DatetimeIndex, - accessors=DatetimeIndex._datetimelike_methods, - typ='method') - - -class TimedeltaProperties(Properties): +@accessors.wrap_delegate_names(delegate=TimedeltaIndex, + accessors=TimedeltaIndex._datetimelike_ops, + typ='property') +@accessors.wrap_delegate_names(delegate=TimedeltaIndex, + accessors=TimedeltaIndex._datetimelike_methods, + typ='method') +class TimedeltaProperties(BaseDatetimeAccessor): """ Accessor object for datetimelike properties of the Series values. @@ -190,6 +201,7 @@ class TimedeltaProperties(Properties): def to_pytimedelta(self): return self.values.to_pytimedelta() + # TODO: Do this with wrap_delegate_names @property def components(self): """ @@ -204,17 +216,13 @@ def components(self): return self.values.components.set_index(self.index) -TimedeltaProperties._add_delegate_accessors( - delegate=TimedeltaIndex, - accessors=TimedeltaIndex._datetimelike_ops, - typ='property') -TimedeltaProperties._add_delegate_accessors( - delegate=TimedeltaIndex, - accessors=TimedeltaIndex._datetimelike_methods, - typ='method') - - -class PeriodProperties(Properties): +@accessors.wrap_delegate_names(delegate=PeriodIndex, + accessors=PeriodIndex._datetimelike_ops, + typ='property') +@accessors.wrap_delegate_names(delegate=PeriodIndex, + accessors=PeriodIndex._datetimelike_methods, + typ='method') +class PeriodProperties(BaseDatetimeAccessor): """ Accessor object for datetimelike properties of the Series values. @@ -229,18 +237,20 @@ class PeriodProperties(Properties): """ -PeriodProperties._add_delegate_accessors( - delegate=PeriodIndex, - accessors=PeriodIndex._datetimelike_ops, - typ='property') -PeriodProperties._add_delegate_accessors( - delegate=PeriodIndex, - accessors=PeriodIndex._datetimelike_methods, - typ='method') - - class CombinedDatetimelikeProperties(DatetimeProperties, TimedeltaProperties): # This class is never instantiated, and exists solely for the benefit of # the Series.dt class property. For Series objects, .dt will always be one # of the more specific classes above. __doc__ = DatetimeProperties.__doc__ + + @classmethod + def _make_accessor(cls, values): + try: + return maybe_to_datetimelike(values) + except Exception: + msg = "Can only use .dt accessor with datetimelike values" + raise AttributeError(msg) + + +DatetimeAccessor = CombinedDatetimelikeProperties +# Alias to mirror CategoricalAccessor diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index d483924236c41..bf9551d3d9dd1 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -23,10 +23,25 @@ from pandas.core import base, accessors, missing import pandas.core.indexes.base as ibase +from pandas.core.categorical import Categorical + _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update(dict(target_klass='CategoricalIndex')) +@accessors.wrap_delegate_names(delegate=Categorical, + accessors=["rename_categories", + "reorder_categories", + "add_categories", + "remove_categories", + "remove_unused_categories", + "set_categories", + "as_ordered", + "as_unordered", + "min", + "max"], + typ='method', + overwrite=True) class CategoricalIndex(Index, accessors.PandasDelegate): """ @@ -56,6 +71,11 @@ class CategoricalIndex(Index, accessors.PandasDelegate): _engine_type = libindex.Int64Engine _attributes = ['name'] + def __init__(self, *args, **kwargs): + # Override to prevent accessors.PandasDelegate.__init__ from executing + # This is a kludge. + pass + def __new__(cls, data=None, categories=None, ordered=None, dtype=None, copy=False, name=None, fastpath=False, **kwargs): @@ -681,32 +701,15 @@ def _evaluate_compare(self, other): def _delegate_method(self, name, *args, **kwargs): """ method delegation to the ._values """ method = getattr(self._values, name) - if 'inplace' in kwargs: + if kwargs.get('inplace', False): raise ValueError("cannot use inplace with CategoricalIndex") res = method(*args, **kwargs) if is_scalar(res): return res return CategoricalIndex(res, name=self.name) - @classmethod - def _add_accessors(cls): - """ add in Categorical accessor methods """ - - from pandas.core.categorical import Categorical - CategoricalIndex._add_delegate_accessors( - delegate=Categorical, accessors=["rename_categories", - "reorder_categories", - "add_categories", - "remove_categories", - "remove_unused_categories", - "set_categories", - "as_ordered", "as_unordered", - "min", "max"], - typ='method', overwrite=True) - CategoricalIndex._add_numeric_methods_add_sub_disabled() CategoricalIndex._add_numeric_methods_disabled() CategoricalIndex._add_logical_methods_disabled() CategoricalIndex._add_comparison_methods() -CategoricalIndex._add_accessors() diff --git a/pandas/core/series.py b/pandas/core/series.py index 24f7add7e0921..74256663aeed2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -14,6 +14,10 @@ import numpy as np import numpy.ma as ma +from pandas import compat +from pandas.compat import zip, u, OrderedDict, StringIO +from pandas.compat.numpy import function as nv + from pandas.core.dtypes.common import ( is_categorical_dtype, is_bool, @@ -37,6 +41,11 @@ maybe_convert_platform, maybe_cast_to_datetime, maybe_castable) from pandas.core.dtypes.missing import isnull, notnull, remove_na_arraylike + +from pandas.core import (generic, base, accessors, strings, + algorithms, ops, nanops) + +import pandas.core.common as com from pandas.core.common import (is_bool_indexer, _default_index, _asarray_tuplesafe, @@ -47,36 +56,25 @@ _maybe_box_datetimelike, _dict_compat, standardize_mapping) -from pandas.core.index import (Index, MultiIndex, InvalidIndexError, - Float64Index, _ensure_index) -from pandas.core.indexing import check_bool_indexer, maybe_convert_indices - -from pandas.core import generic, base, accessors - +from pandas.core.config import get_option from pandas.core.internals import SingleBlockManager from pandas.core.categorical import Categorical, CategoricalAccessor -import pandas.core.strings as strings -from pandas.core.indexes.accessors import ( - maybe_to_datetimelike, CombinedDatetimelikeProperties) + +from pandas.core.indexes.accessors import CombinedDatetimelikeProperties from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexes.period import PeriodIndex -from pandas import compat -from pandas.io.formats.terminal import get_terminal_size -from pandas.compat import zip, u, OrderedDict, StringIO -from pandas.compat.numpy import function as nv - -import pandas.core.ops as ops -import pandas.core.algorithms as algorithms +from pandas.core.index import (Index, MultiIndex, InvalidIndexError, + Float64Index, _ensure_index) +from pandas.core.indexing import check_bool_indexer, maybe_convert_indices -import pandas.core.common as com -import pandas.core.nanops as nanops +from pandas.io.formats.terminal import get_terminal_size import pandas.io.formats.format as fmt + from pandas.util._decorators import Appender, deprecate_kwarg, Substitution from pandas.util._validators import validate_bool_kwarg from pandas._libs import index as libindex, tslib as libts, lib, iNaT -from pandas.core.config import get_option __all__ = ['Series'] @@ -115,8 +113,7 @@ def wrapper(self): # Series class -class Series(base.IndexOpsMixin, strings.StringAccessorMixin, - generic.NDFrame,): +class Series(base.IndexOpsMixin, generic.NDFrame,): """ One-dimensional ndarray with axis labels (including time series). @@ -146,9 +143,19 @@ class Series(base.IndexOpsMixin, strings.StringAccessorMixin, Copy input data """ _metadata = ['name'] - _accessors = frozenset(['dt', 'cat', 'str']) _allow_index_ops = True + _accessors = frozenset(['dt', 'cat', 'str']) + + # Datetimelike delegation methods + dt = accessors.AccessorProperty(CombinedDatetimelikeProperties) + + # Categorical methods + cat = accessors.AccessorProperty(CategoricalAccessor) + + # string methods + str = accessors.AccessorProperty(strings.StringAccessor) + def __init__(self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False): @@ -2883,30 +2890,6 @@ def to_period(self, freq=None, copy=True): return self._constructor(new_values, index=new_index).__finalize__(self) - # ------------------------------------------------------------------------- - # Datetimelike delegation methods - - def _make_dt_accessor(self): - try: - return maybe_to_datetimelike(self) - except Exception: - raise AttributeError("Can only use .dt accessor with datetimelike " - "values") - - dt = accessors.AccessorProperty(CombinedDatetimelikeProperties, - _make_dt_accessor) - - # ------------------------------------------------------------------------- - # Categorical methods - - def _make_cat_accessor(self): - if not is_categorical_dtype(self.dtype): - raise AttributeError("Can only use .cat accessor with a " - "'category' dtype") - return CategoricalAccessor(self.values, self.index) - - cat = accessors.AccessorProperty(CategoricalAccessor, _make_cat_accessor) - def _dir_deletions(self): return self._accessors @@ -3106,7 +3089,7 @@ def create_from_value(value, index, dtype): import pandas.plotting._core as _gfx # noqa Series.plot = accessors.AccessorProperty(_gfx.SeriesPlotMethods, - _gfx.SeriesPlotMethods) + _gfx.SeriesPlotMethods) Series.hist = _gfx.hist_series # Add arithmetic! diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 977868334ca58..87aec393203f1 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -18,7 +18,6 @@ import pandas.compat as compat from pandas.core import accessors -from pandas.core.accessors import AccessorProperty from pandas.core.base import NoNewAttributesMixin from pandas.util._decorators import Appender import re @@ -941,6 +940,7 @@ def str_find(arr, sub, start=0, end=None, side='left'): return _na_map(f, arr, dtype=int) + _shared_docs['index'] = textwrap.dedent(""" Return %(side)s indexes in each strings where the substring is fully contained between [start:end]. This is the same as @@ -965,6 +965,7 @@ def str_find(arr, sub, start=0, end=None, side='left'): %(also)s """) + @Appender(_shared_docs['index'] % dict(side='lowest', similar='find', method='index', also='rindex : Return highest indexes in each strings')) @@ -1911,6 +1912,14 @@ def rindex(self, sub, start=0, end=None): docstring=_shared_docs['ismethods'] % _shared_docs['isdecimal']) + # TODO: Use this instead of wrapping all of these methods individually? + def _delegate_method(self, name, *args, **kwargs): + # TODO: It would be really nice to keep the signatures + method = getattr(self.values, name) + res = method(*args, **kwargs) + # TODO: Should this get wrapped in an index? + return res + @classmethod def _make_accessor(cls, data): from pandas.core.index import Index @@ -1943,7 +1952,8 @@ def _make_accessor(cls, data): raise AttributeError(message) return StringAccessor(data) -StringAccessor = StringMethods # Alias to mirror CategoricalAccessor + +StringAccessor = StringMethods # Alias to mirror CategoricalAccessor # TODO: This is only mixed in to Index (this PR takes it out of Series) @@ -1953,7 +1963,6 @@ def _make_accessor(cls, data): class StringAccessorMixin(object): """ Mixin to add a `.str` acessor to the class.""" - str = accessors.AccessorProperty(StringAccessor) def _dir_additions(self): diff --git a/pandas/tests/test_accessors.py b/pandas/tests/test_accessors.py index e6f2ac10824b2..562daae19fb35 100644 --- a/pandas/tests/test_accessors.py +++ b/pandas/tests/test_accessors.py @@ -9,7 +9,8 @@ import pandas as pd from pandas.core.accessors import (wrap_delegate_names, - PandasDelegate, AccessorProperty) + PandasDelegate, AccessorProperty) + class State(object): def __repr__(self): @@ -31,7 +32,6 @@ def fips(self): return {'California': 6, 'Alabama': 1}[self.name] - @wrap_delegate_names(delegate=State, accessors=["fips"], typ="method") @@ -39,18 +39,17 @@ def fips(self): accessors=["abbrev"], typ="property") class StateDelegate(PandasDelegate): - + def __init__(self, values): self.values = values - #self._freeze() @classmethod def _make_accessor(cls, data): - """ - When implementing custom accessors, `_make_accessor` is the place - to do validation that the attributes be accessed will actually be - present in the underlying data. - """ + """ + When implementing custom accessors, `_make_accessor` is the place + to do validation that the attributes be accessed will actually be + present in the underlying data. + """ if not isinstance(data, pd.Series): raise ValueError('Input must be a Series of States') elif not data.apply(lambda x: isinstance(x, State)).all(): @@ -66,35 +65,27 @@ def _delegate_property_get(self, name): return self.values.apply(state_property) def _delegate_property_set(self, name, new_values): - """ - Setting properties via accessors is permitted but discouraged. - """ - for (obj, val) in zip(self.values, new_values): - setattr(obj, name, val) - - - + """ + Setting properties via accessors is permitted but discouraged. + """ + for (obj, val) in zip(self.values, new_values): + setattr(obj, name, val) def test_geo_state_accessor(): - import pandas.util.testing as tm - - pd.Series.state = AccessorProperty(StateDelegate) - - ser = pd.Series([State('Alabama'), State('California')]) - - abbrev = pd.Series(['AL', 'CA']) - tm.assert_series_equal(ser.state.abbrev, abbrev) - - fips = pd.Series([1, 6]) - tm.assert_series_equal(ser.state.fips(), fips) - + import pandas.util.testing as tm + pd.Series.state = AccessorProperty(StateDelegate) - ser.state.abbrev = ['Foo', 'Bar'] + ser = pd.Series([State('Alabama'), State('California')]) - new_abbrev = pd.Series(['Foo', 'Bar']) - tm.assert_series_equal(ser.state.abbrev, new_abbrev) + abbrev = pd.Series(['AL', 'CA']) + tm.assert_series_equal(ser.state.abbrev, abbrev) + fips = pd.Series([1, 6]) + tm.assert_series_equal(ser.state.fips(), fips) + ser.state.abbrev = ['Foo', 'Bar'] + new_abbrev = pd.Series(['Foo', 'Bar']) + tm.assert_series_equal(ser.state.abbrev, new_abbrev) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 65fd7cd1780f0..072ab7c6f7689 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -119,14 +119,17 @@ def test_invalid_delegation(self): # the _delegate_* methods need to be overriden to not raise a TypeError for name in self.Delegator._properties: - func = accessors.Delegator.create_delegator_property(name, self.Delegator) + func = accessors.Delegator.create_delegator_property(name, + self.Delegator + ) setattr(self.Delegate, name, func) for name in self.Delegator._methods: - func = accessors.Delegator.create_delegator_method(name, self.Delegator) + func = accessors.Delegator.create_delegator_method(name, + self.Delegator + ) setattr(self.Delegate, name, func) - delegate = self.Delegate(self.Delegator()) def f(): diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 2f84124e0154e..c5d0b0f32130c 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -4276,11 +4276,11 @@ def test_cat_accessor_no_new_attributes(self): def test_str_accessor_api_for_categorical(self): # https://github.com/pandas-dev/pandas/issues/10661 - from pandas.core.strings import StringDelegate + from pandas.core.strings import StringAccessor s = Series(list('aabb')) s = s + " " + s c = s.astype('category') - assert isinstance(c.str, StringDelegate) + assert isinstance(c.str, StringAccessor) # str functions, which need special arguments special_func_defs = [ From 74e4539131f5520250a45619647ce1a9854fc3e6 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 20 Jul 2017 14:13:35 -0700 Subject: [PATCH 07/15] Remove unused import that was causing a lint error --- pandas/core/strings.py | 2 ++ pandas/tests/test_strings.py | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 87aec393203f1..2a3abe7f5c6b3 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1912,6 +1912,8 @@ def rindex(self, sub, start=0, end=None): docstring=_shared_docs['ismethods'] % _shared_docs['isdecimal']) + # TODO: Should we explicitly subclass PandasDelegate to clarify its role, + # even though it isn't actually needed? # TODO: Use this instead of wrapping all of these methods individually? def _delegate_method(self, name, *args, **kwargs): # TODO: It would be really nice to keep the signatures diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 9d83937c1380f..70d21e943ba65 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -2690,8 +2690,6 @@ def test_str_cat_raises_intuitive_error(self): s.str.cat(' ') def test_index_str_accessor_visibility(self): - from pandas.core.strings import StringMethods - if not compat.PY3: cases = [(['a', 'b'], 'string'), (['a', u('b')], 'mixed'), ([u('a'), u('b')], 'unicode'), From 22d4892495809761b50a41d89c3987b12336e6a1 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 22 Jul 2017 09:17:19 -0700 Subject: [PATCH 08/15] Wrap long line --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e319ef5c062c3..0081f2f5d212d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5747,7 +5747,8 @@ def isin(self, values): # ---------------------------------------------------------------------- # Add plotting methods to DataFrame - plot = accessors.AccessorProperty(gfx.FramePlotMethods, gfx.FramePlotMethods) + plot = accessors.AccessorProperty(gfx.FramePlotMethods, + gfx.FramePlotMethods) hist = gfx.hist_frame boxplot = gfx.boxplot_frame @@ -6105,4 +6106,3 @@ def _from_nested_dict(data): def _put_str(s, space): return ('%s' % s)[:space].ljust(space) - From 014fae0165ff7b4b8db5d74662776196de7c1db8 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 22 Jul 2017 10:38:15 -0700 Subject: [PATCH 09/15] Refactor tests and documentation --- pandas/core/accessors.py | 209 ++++++++------------------------- pandas/tests/test_accessors.py | 90 ++++++++++++-- 2 files changed, 125 insertions(+), 174 deletions(-) diff --git a/pandas/core/accessors.py b/pandas/core/accessors.py index be738f9de961e..103321735ce60 100644 --- a/pandas/core/accessors.py +++ b/pandas/core/accessors.py @@ -1,181 +1,73 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -""" - -An example/recipe for creating a custom accessor. - - -The primary use case for accessors is when a Series contains instances -of a particular class and we want to access properties/methods of these -instances in Series form. - -Suppose we have a custom State class representing US states: - -class State(object): - def __repr__(self): - return repr(self.name) - - def __init__(self, name): - self.name = name - self._abbrev_dict = {'California': 'CA', 'Alabama': 'AL'} - - @property - def abbrev(self): - return self._abbrev_dict[self.name] - - @abbrev.setter - def abbrev(self, value): - self._abbrev_dict[self.name] = value - - def fips(self): - return {'California': 6, 'Alabama': 1}[self.name] - - -We can construct a series of these objects: - ->>> ser = pd.Series([State('Alabama'), State('California')]) ->>> ser -0 'Alabama' -1 'California' -dtype: object +from pandas.core.base import PandasObject -We would like direct access to the `abbrev` property and `fips` method. -One option is to access these manually with `apply`: ->>> ser.apply(lambda x: x.fips()) -0 1 -1 6 -dtype: int64 +class PandasDelegate(PandasObject): + """ an abstract base class for delegating methods/properties -But doing that repeatedly gets old in a hurry, so we decide to make a -custom accessor. This entails subclassing `PandasDelegate` to specify -what should be accessed and how. + Usage: To make a custom accessor, subclass `PandasDelegate`, overriding + the methods below. Then decorate this subclass with + `accessors.wrap_delegate_names` describing the methods and properties + that should be delegated. -There are four methods that *may* be defined in this subclass, one of which -*must* be defined. The mandatory method is a classmethod called -`_make_accessor`. `_make_accessor` is responsible doing any validation on -inputs for the accessor. In this case, the inputs must be a Series -containing State objects. + Examples can be found in: + pandas.core.accessors.CategoricalAccessor + pandas.core.indexes.accessors (complicated example) + pandas.core.indexes.category.CategoricalIndex + pandas.core.strings.StringMethods + pandas.tests.test_accessors -class StateDelegate(PandasDelegate): + """ def __init__(self, values): + """ + The subclassed constructor will generally only be called by + _make_accessor. See _make_accessor.__doc__. + """ self.values = values @classmethod - def _make_accessor(cls, data): - if not isinstance(data, pd.Series): - raise ValueError('Input must be a Series of States') - elif not data.apply(lambda x: isinstance(x, State)).all(): - raise ValueError('All entries must be State objects') - return StateDelegate(data) - - -With `_make_accessor` defined, we have enough to create the accessor, but -not enough to actually do anything useful with it. In order to access -*methods* of State objects, we implement `_delegate_method`. -`_delegate_method` calls the underlying method for each object in the -series and wraps these in a new Series. The simplest version looks like: - - def _delegate_method(self, name, *args, **kwargs): - state_method = lambda x: getattr(x, name)(*args, **kwargs) - return self.values.apply(state_method) - -Similarly in order to access *properties* of State objects, we need to -implement `_delegate_property_get`: - - def _delegate_property_get(self, name): - state_property = lambda x: getattr(x, name) - return self.values.apply(state_property) - - -On ocassion, we may want to be able to *set* property being accessed. -This is discouraged, but allowed (as long as the class being accessed -allows the property to be set). Doing so requires implementing -`_delegate_property_set`: - - def _delegate_property_set(self, name, new_values): - for (obj, val) in zip(self.values, new_values): - setattr(obj, name, val) - - -With these implemented, `StateDelegate` knows how to handle methods and -properties. We just need to tell it what names and properties it is -supposed to handle. This is done by decorating the `StateDelegate` -class with `pd.accessors.wrap_delegate_names`. We apply the decorator -once with a list of all the methods the accessor should recognize and -once with a list of all the properties the accessor should recognize. - - -@wrap_delegate_names(delegate=State, - accessors=["fips"], - typ="method") -@wrap_delegate_names(delegate=State, - accessors=["abbrev"], - typ="property") -class StateDelegate(PandasDelegate): - [...] - - -We can now pin the `state` accessor to the pd.Series class (we could -alternatively pin it to the pd.Index class with a slightly different -implementation above): - -pd.Series.state = accessors.AccessorProperty(StateDelegate) - - ->>> ser = pd.Series([State('Alabama'), State('California')]) ->>> isinstance(ser.state, StateDelegate) -True - ->>> ser.state.abbrev -0 AL -1 CA -dtype: object - ->>> ser.state.fips() -0 1 -1 6 - ->>> ser.state.abbrev = ['Foo', 'Bar'] ->>> ser.state.abbrev -0 Foo -1 Bar -dtype: object - - - -""" -from pandas.core.base import PandasObject -from pandas.core import common as com - - -class PandasDelegate(PandasObject): - """ an abstract base class for delegating methods/properties + def _make_accessor(cls, data): # pragma: no cover + """ + _make_accessor should implement any necessary validation on the + data argument to ensure that the properties/methods being + accessed will be available. - Usage: To make a custom accessor, start by subclassing `Delegate`. - See example in the module-level docstring. + _make_accessor should return cls(data). If necessary, the arguments + to the constructor can be expanded. In this case, __init__ will + need to be overrided as well. - """ + Parameters + ---------- + data : the underlying object being accessed, usually Series or Index - def __init__(self, values): - self.values = values - # #self._freeze() + Returns + ------- + Delegate : instance of PandasDelegate or subclass - @classmethod - def _make_accessor(cls, data): # pragma: no cover + """ raise NotImplementedError( 'It is up to subclasses to implement ' '_make_accessor. This does input validation on the object to ' 'which the accessor is being pinned. ' 'It should return an instance of `cls`.') + # return cls(data) def _delegate_property_get(self, name, *args, **kwargs): raise TypeError("You cannot access the " "property {name}".format(name=name)) def _delegate_property_set(self, name, value, *args, **kwargs): + """ + Overriding _delegate_property_set is discouraged. It is generally + better to directly interact with the underlying data than to + alter it via the accessor. + + An example that ignores this advice can be found in + tests.test_accessors.TestVectorizedAccessor + """ raise TypeError("The property {name} cannot be set".format(name=name)) def _delegate_method(self, name, *args, **kwargs): @@ -242,14 +134,8 @@ def create_delegator_method(name, delegate): def func(self, *args, **kwargs): return self._delegate_method(name, *args, **kwargs) - if callable(name): - # A function/method was passed directly instead of a name - # This may also render the `delegate` arg unnecessary. - func.__name__ = name.__name__ # TODO: is this generally valid? - func.__doc__ = name.__doc__ - else: - func.__name__ = name - func.__doc__ = getattr(delegate, name).__doc__ + func.__name__ = name + func.__doc__ = getattr(delegate, name).__doc__ return func @staticmethod @@ -294,13 +180,10 @@ def add_delegate_accessors(cls): else: func = Delegator.create_delegator_method(name, delegate) - # Allow for a callable to be passed instead of a name. - title = com._get_callable_name(name) - title = title or name # don't overwrite existing methods/properties unless # specifically told to do so - if overwrite or not hasattr(cls, title): - setattr(cls, title, func) + if overwrite or not hasattr(cls, name): + setattr(cls, name, func) return cls diff --git a/pandas/tests/test_accessors.py b/pandas/tests/test_accessors.py index 562daae19fb35..3726a74bce452 100644 --- a/pandas/tests/test_accessors.py +++ b/pandas/tests/test_accessors.py @@ -5,14 +5,22 @@ An example/recipe/test for implementing custom accessors. """ +import unittest +import pandas.util.testing as tm import pandas as pd from pandas.core.accessors import (wrap_delegate_names, PandasDelegate, AccessorProperty) +# Example 1: +# An accessor for attributes of custom class in a Series with object dtype. + class State(object): + """ + A dummy class for which only two states have the attributes implemented. + """ def __repr__(self): return repr(self.name) @@ -72,20 +80,80 @@ def _delegate_property_set(self, name, new_values): setattr(obj, name, val) -def test_geo_state_accessor(): - import pandas.util.testing as tm +class TestVectorizedAccessor(unittest.TestCase): + + @classmethod + def setup_class(cls): + pd.Series.state = AccessorProperty(StateDelegate) + + cls.ser = pd.Series([State('Alabama'), State('California')]) + + @classmethod + def teardown_class(cls): + del pd.Series.state + # TODO: is there a nicer way to do this with `mock`? + + def test_method(self): + ser = self.ser + fips = pd.Series([1, 6]) + tm.assert_series_equal(ser.state.fips(), fips) + + def test_property_get(self): + ser = self.ser + abbrev = pd.Series(['AL', 'CA']) + tm.assert_series_equal(ser.state.abbrev, abbrev) + + def test_property_set(self): + ser = self.ser.copy() + + ser.state.abbrev = ['Foo', 'Bar'] + new_abbrev = pd.Series(['Foo', 'Bar']) + tm.assert_series_equal(ser.state.abbrev, new_abbrev) + - pd.Series.state = AccessorProperty(StateDelegate) +@wrap_delegate_names(delegate=pd.Series, + accessors=["real", "imag"], + typ="property") +@wrap_delegate_names(delegate=pd.Series, + accessors=["abs"], + typ="method") +class ForgotToOverride(PandasDelegate): + # A case where the relevant methods were not overridden. Everything + # should raise NotImplementedError or TypeError + @classmethod + def _make_accessor(cls, data): + return cls(data) + + +class TestUnDelegated(unittest.TestCase): + + @classmethod + def setup_class(cls): + pd.Series.forgot = AccessorProperty(ForgotToOverride) + + cls.ser = pd.Series(range(-2, 2)) + + @classmethod + def teardown_class(cls): + del pd.Series.forgot - ser = pd.Series([State('Alabama'), State('California')]) + def test_get_fails(self): + forgot = self.ser.forgot + with self.assertRaises(TypeError): + forgot.real - abbrev = pd.Series(['AL', 'CA']) - tm.assert_series_equal(ser.state.abbrev, abbrev) + with self.assertRaises(TypeError): + forgot.imag - fips = pd.Series([1, 6]) - tm.assert_series_equal(ser.state.fips(), fips) + def test_set_fails(self): + forgot = self.ser.forgot + with self.assertRaises(TypeError): + forgot.real = range(5) - ser.state.abbrev = ['Foo', 'Bar'] + # Check that the underlying hasn't been affected + tm.assert_series_equal(self.ser, pd.Series(range(-2, 2))) - new_abbrev = pd.Series(['Foo', 'Bar']) - tm.assert_series_equal(ser.state.abbrev, new_abbrev) + def test_method_fails(self): + forgot = self.ser.forgot + with self.assertRaises(TypeError): + forgot.abs() From dd8315ceae3d42d5266d6a92d6a795162c77aa33 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 22 Jul 2017 10:38:37 -0700 Subject: [PATCH 10/15] Typos, flake8 fixes, rearrange comments --- pandas/core/indexes/category.py | 4 ++-- pandas/core/series.py | 2 +- pandas/core/strings.py | 16 ++++++---------- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index bf9551d3d9dd1..99203f993e68c 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -72,8 +72,8 @@ class CategoricalIndex(Index, accessors.PandasDelegate): _attributes = ['name'] def __init__(self, *args, **kwargs): - # Override to prevent accessors.PandasDelegate.__init__ from executing - # This is a kludge. + # Override to prevent accessors.PandasDelegate.__init__ from + # executing pass def __new__(cls, data=None, categories=None, ordered=None, dtype=None, diff --git a/pandas/core/series.py b/pandas/core/series.py index 18a2848389005..35d47c88bb5e8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -113,7 +113,7 @@ def wrapper(self): # Series class -class Series(base.IndexOpsMixin, generic.NDFrame,): +class Series(base.IndexOpsMixin, generic.NDFrame): """ One-dimensional ndarray with axis labels (including time series). diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 2a3abe7f5c6b3..14b839b90bf6b 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1912,16 +1912,6 @@ def rindex(self, sub, start=0, end=None): docstring=_shared_docs['ismethods'] % _shared_docs['isdecimal']) - # TODO: Should we explicitly subclass PandasDelegate to clarify its role, - # even though it isn't actually needed? - # TODO: Use this instead of wrapping all of these methods individually? - def _delegate_method(self, name, *args, **kwargs): - # TODO: It would be really nice to keep the signatures - method = getattr(self.values, name) - res = method(*args, **kwargs) - # TODO: Should this get wrapped in an index? - return res - @classmethod def _make_accessor(cls, data): from pandas.core.index import Index @@ -1954,6 +1944,12 @@ def _make_accessor(cls, data): raise AttributeError(message) return StringAccessor(data) + # TODO: Should we explicitly subclass PandasDelegate to clarify its + # role, even though it isn't actually needed? + # _delegate_method is really simple in this case: + # getattr(self.values, name)(*args, **kwargs) + # possibly wrapped in an Index. + StringAccessor = StringMethods # Alias to mirror CategoricalAccessor From 74a237b8fceedb2ca79d4524a7da8624071c2417 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 23 Jul 2017 10:02:32 -0700 Subject: [PATCH 11/15] Simplify categorical make_accessor args --- pandas/core/categorical.py | 8 ++++---- pandas/core/indexes/category.py | 6 +----- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 617904de3dced..1703e1599b883 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -2055,11 +2055,11 @@ def _make_accessor(cls, values): if not is_categorical_dtype(values.dtype): msg = "Can only use .cat accessor with a 'category' dtype" raise AttributeError(msg) - return CategoricalAccessor(values.values, values.index) + return CategoricalAccessor(values) - def __init__(self, values, index): - self.categorical = values - self.index = index + def __init__(self, values): + self.categorical = values.values + self.index = values.index self._freeze() def _delegate_property_get(self, name): diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 99203f993e68c..2c4a4225a3364 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -123,8 +123,6 @@ def _create_from_codes(self, codes, categories=None, ordered=None, ------- CategoricalIndex """ - - from pandas.core.categorical import Categorical if categories is None: categories = self.categories if ordered is None: @@ -154,7 +152,6 @@ def _create_categorical(self, data, categories=None, ordered=None): """ if not isinstance(data, ABCCategorical): ordered = False if ordered is None else ordered - from pandas.core.categorical import Categorical data = Categorical(data, categories=categories, ordered=ordered) else: if categories is not None: @@ -403,7 +400,6 @@ def where(self, cond, other=None): other = self._na_value values = np.where(cond, self.values, other) - from pandas.core.categorical import Categorical cat = Categorical(values, categories=self.categories, ordered=self.ordered) @@ -698,6 +694,7 @@ def _evaluate_compare(self, other): cls.__le__ = _make_compare('__le__') cls.__ge__ = _make_compare('__ge__') + # TODO: Can we de-duplicate this with core.categorical Delegate? def _delegate_method(self, name, *args, **kwargs): """ method delegation to the ._values """ method = getattr(self._values, name) @@ -708,7 +705,6 @@ def _delegate_method(self, name, *args, **kwargs): return res return CategoricalIndex(res, name=self.name) - CategoricalIndex._add_numeric_methods_add_sub_disabled() CategoricalIndex._add_numeric_methods_disabled() CategoricalIndex._add_logical_methods_disabled() From c931d4bcccfc3b660af315577aeeb42deab25f8c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 25 Jul 2017 08:33:09 -0700 Subject: [PATCH 12/15] Rename PandasDelegate subclasses FooDelegate --- pandas/core/categorical.py | 6 ++---- pandas/core/indexes/accessors.py | 24 ++++++++++----------- pandas/core/strings.py | 9 ++++---- pandas/tests/series/test_datetime_values.py | 6 +++--- pandas/tests/test_categorical.py | 14 ++++++------ pandas/tests/test_strings.py | 12 +++++------ 6 files changed, 35 insertions(+), 36 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 1703e1599b883..562c0aa2a522e 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -2027,7 +2027,7 @@ def repeat(self, repeats, *args, **kwargs): @accessors.wrap_delegate_names(delegate=Categorical, accessors=["categories", "ordered"], typ="property") -class CategoricalAccessor(accessors.PandasDelegate, NoNewAttributesMixin): +class CategoricalDelegate(accessors.PandasDelegate, NoNewAttributesMixin): """ Accessor object for categorical properties of the Series values. @@ -2055,7 +2055,7 @@ def _make_accessor(cls, values): if not is_categorical_dtype(values.dtype): msg = "Can only use .cat accessor with a 'category' dtype" raise AttributeError(msg) - return CategoricalAccessor(values) + return CategoricalDelegate(values) def __init__(self, values): self.categorical = values.values @@ -2075,8 +2075,6 @@ def _delegate_method(self, name, *args, **kwargs): if res is not None: return Series(res, index=self.index) - # TODO: Can we get this from _delegate_property_get? - # Would need to get self.index into the result @property def codes(self): from pandas import Series diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 849573371b9e8..b3ed44daa220d 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -63,14 +63,14 @@ def maybe_to_datetimelike(data, copy=False): data = orig.values.categories if is_datetime64_dtype(data.dtype): - return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), + return DatetimeDelegate(DatetimeIndex(data, copy=copy, freq='infer'), index, name=name, orig=orig) elif is_datetime64tz_dtype(data.dtype): - return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer', + return DatetimeDelegate(DatetimeIndex(data, copy=copy, freq='infer', ambiguous='infer'), index, data.name, orig=orig) elif is_timedelta64_dtype(data.dtype): - return TimedeltaProperties(TimedeltaIndex(data, copy=copy, + return TimedeltaDelegate(TimedeltaIndex(data, copy=copy, freq='infer'), index, name=name, orig=orig) else: @@ -78,7 +78,7 @@ def maybe_to_datetimelike(data, copy=False): return PeriodProperties(PeriodIndex(data, copy=copy), index, name=name, orig=orig) if is_datetime_arraylike(data): - return DatetimeProperties(DatetimeIndex(data, copy=copy, + return DatetimeDelegate(DatetimeIndex(data, copy=copy, freq='infer'), index, name=name, orig=orig) @@ -86,7 +86,7 @@ def maybe_to_datetimelike(data, copy=False): "datetimelike index".format(type(data))) -class BaseDatetimeAccessor(accessors.PandasDelegate, NoNewAttributesMixin): +class BaseDatetimeDelegate(accessors.PandasDelegate, NoNewAttributesMixin): def __init__(self, values, index, name, orig=None): self.values = values @@ -107,7 +107,7 @@ def _delegate_property_get(self, name): elif not is_list_like(result): return result elif isinstance(result, DataFrame): - # e.g. TimedeltaProperties.components + # e.g. TimedeltaDelegate.components return result.set_index(self.index) result = np.asarray(result) @@ -161,7 +161,7 @@ def _delegate_method(self, name, *args, **kwargs): @accessors.wrap_delegate_names(delegate=DatetimeIndex, accessors=DatetimeIndex._datetimelike_methods, typ='method') -class DatetimeProperties(BaseDatetimeAccessor): +class DatetimeDelegate(BaseDatetimeDelegate): """ Accessor object for datetimelike properties of the Series values. @@ -185,7 +185,7 @@ def to_pydatetime(self): @accessors.wrap_delegate_names(delegate=TimedeltaIndex, accessors=TimedeltaIndex._datetimelike_methods, typ='method') -class TimedeltaProperties(BaseDatetimeAccessor): +class TimedeltaDelegate(BaseDatetimeDelegate): """ Accessor object for datetimelike properties of the Series values. @@ -222,7 +222,7 @@ def components(self): @accessors.wrap_delegate_names(delegate=PeriodIndex, accessors=PeriodIndex._datetimelike_methods, typ='method') -class PeriodProperties(BaseDatetimeAccessor): +class PeriodProperties(BaseDatetimeDelegate): """ Accessor object for datetimelike properties of the Series values. @@ -237,11 +237,11 @@ class PeriodProperties(BaseDatetimeAccessor): """ -class CombinedDatetimelikeProperties(DatetimeProperties, TimedeltaProperties): +class CombinedDatetimelikeDelegate(DatetimeDelegate, TimedeltaDelegate): # This class is never instantiated, and exists solely for the benefit of # the Series.dt class property. For Series objects, .dt will always be one # of the more specific classes above. - __doc__ = DatetimeProperties.__doc__ + __doc__ = DatetimeDelegate.__doc__ @classmethod def _make_accessor(cls, values): @@ -252,5 +252,5 @@ def _make_accessor(cls, values): raise AttributeError(msg) -DatetimeAccessor = CombinedDatetimelikeProperties +DatetimeAccessor = CombinedDatetimelikeDelegate # Alias to mirror CategoricalAccessor diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 14b839b90bf6b..408427723e4d8 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1426,7 +1426,7 @@ def do_copy(target): return do_copy -class StringMethods(NoNewAttributesMixin): +class StringMethods(accessors.PandasDelegate, NoNewAttributesMixin): """ Vectorized string functions for Series and Index. NAs stay NA unless handled otherwise by a particular method. Patterned after Python's string @@ -1942,7 +1942,7 @@ def _make_accessor(cls, data): message = ("Can only use .str accessor with Index, not " "MultiIndex") raise AttributeError(message) - return StringAccessor(data) + return StringDelegate(data) # TODO: Should we explicitly subclass PandasDelegate to clarify its # role, even though it isn't actually needed? @@ -1951,7 +1951,8 @@ def _make_accessor(cls, data): # possibly wrapped in an Index. -StringAccessor = StringMethods # Alias to mirror CategoricalAccessor +StringDelegate = StringMethods +# Alias to mirror CategoricalDelegate and CombinedDatetimelikeDelegate # TODO: This is only mixed in to Index (this PR takes it out of Series) @@ -1961,7 +1962,7 @@ def _make_accessor(cls, data): class StringAccessorMixin(object): """ Mixin to add a `.str` acessor to the class.""" - str = accessors.AccessorProperty(StringAccessor) + str = accessors.AccessorProperty(StringDelegate) def _dir_additions(self): return set() diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index e810eadd2dee9..974eb03e1f218 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -364,11 +364,11 @@ def test_valid_dt_with_missing_values(self): def test_dt_accessor_api(self): # GH 9322 from pandas.core.indexes.accessors import ( - CombinedDatetimelikeProperties, DatetimeProperties) - assert Series.dt is CombinedDatetimelikeProperties + CombinedDatetimelikeDelegate, DatetimeDelegate) + assert Series.dt is CombinedDatetimelikeDelegate s = Series(date_range('2000-01-01', periods=3)) - assert isinstance(s.dt, DatetimeProperties) + assert isinstance(s.dt, DatetimeDelegate) for s in [Series(np.arange(5)), Series(list('abcde')), Series(np.random.randn(5))]: diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index c5d0b0f32130c..4d35e887edb28 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -4256,10 +4256,10 @@ def get_dir(s): def test_cat_accessor_api(self): # GH 9322 - from pandas.core.categorical import CategoricalAccessor - assert Series.cat is CategoricalAccessor + from pandas.core.categorical import CategoricalDelegate + assert Series.cat is CategoricalDelegate s = Series(list('aabbcde')).astype('category') - assert isinstance(s.cat, CategoricalAccessor) + assert isinstance(s.cat, CategoricalDelegate) invalid = Series([1]) with tm.assert_raises_regex(AttributeError, @@ -4276,11 +4276,11 @@ def test_cat_accessor_no_new_attributes(self): def test_str_accessor_api_for_categorical(self): # https://github.com/pandas-dev/pandas/issues/10661 - from pandas.core.strings import StringAccessor + from pandas.core.strings import StringDelegate s = Series(list('aabb')) s = s + " " + s c = s.astype('category') - assert isinstance(c.str, StringAccessor) + assert isinstance(c.str, StringDelegate) # str functions, which need special arguments special_func_defs = [ @@ -4352,7 +4352,7 @@ def test_str_accessor_api_for_categorical(self): def test_dt_accessor_api_for_categorical(self): # https://github.com/pandas-dev/pandas/issues/10661 - from pandas.core.indexes.accessors import BaseDatetimeAccessor + from pandas.core.indexes.accessors import BaseDatetimeDelegate s_dr = Series(date_range('1/1/2015', periods=5, tz="MET")) c_dr = s_dr.astype("category") @@ -4372,7 +4372,7 @@ def test_dt_accessor_api_for_categorical(self): ("Period", get_ops(PeriodIndex), s_pr, c_pr), ("Timedelta", get_ops(TimedeltaIndex), s_tdr, c_tdr)] - assert isinstance(c_dr.dt, BaseDatetimeAccessor) + assert isinstance(c_dr.dt, BaseDatetimeDelegate) special_func_defs = [ ('strftime', ("%Y-%m-%d",), {}), diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 70d21e943ba65..a272a76fe5335 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -24,8 +24,8 @@ class TestStringMethods(object): def test_api(self): # GH 6106, GH 9322 - assert Series.str is strings.StringAccessor - assert isinstance(Series(['']).str, strings.StringAccessor) + assert Series.str is strings.StringDelegate + assert isinstance(Series(['']).str, strings.StringDelegate) # GH 9184 invalid = Series([1]) @@ -2706,14 +2706,14 @@ def test_index_str_accessor_visibility(self): (['aa', datetime(2011, 1, 1)], 'mixed')] for values, tp in cases: idx = Index(values) - assert isinstance(Series(values).str, strings.StringAccessor) - assert isinstance(idx.str, strings.StringAccessor) + assert isinstance(Series(values).str, strings.StringDelegate) + assert isinstance(idx.str, strings.StringDelegate) assert idx.inferred_type == tp for values, tp in cases: idx = Index(values) - assert isinstance(Series(values).str, strings.StringAccessor) - assert isinstance(idx.str, strings.StringAccessor) + assert isinstance(Series(values).str, strings.StringDelegate) + assert isinstance(idx.str, strings.StringDelegate) assert idx.inferred_type == tp cases = [([1, np.nan], 'floating'), From 6c771b42642a85bd6aebfe30c836c99e68914b5a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 25 Jul 2017 08:34:21 -0700 Subject: [PATCH 13/15] Revert import rearrangement; update names FooDelegate --- pandas/core/series.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 35d47c88bb5e8..7da7c22f8e498 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -56,25 +56,25 @@ _maybe_box_datetimelike, _dict_compat, standardize_mapping) -from pandas.core.config import get_option +from pandas.core.index import (Index, MultiIndex, InvalidIndexError, + Float64Index, _ensure_index) +from pandas.core.indexing import check_bool_indexer, maybe_convert_indices + from pandas.core.internals import SingleBlockManager -from pandas.core.categorical import Categorical, CategoricalAccessor +from pandas.core.categorical import Categorical, CategoricalDelegate -from pandas.core.indexes.accessors import CombinedDatetimelikeProperties +from pandas.core.indexes.accessors import CombinedDatetimelikeDelegate from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexes.period import PeriodIndex -from pandas.core.index import (Index, MultiIndex, InvalidIndexError, - Float64Index, _ensure_index) -from pandas.core.indexing import check_bool_indexer, maybe_convert_indices from pandas.io.formats.terminal import get_terminal_size import pandas.io.formats.format as fmt - from pandas.util._decorators import Appender, deprecate_kwarg, Substitution from pandas.util._validators import validate_bool_kwarg from pandas._libs import index as libindex, tslib as libts, lib, iNaT +from pandas.core.config import get_option __all__ = ['Series'] @@ -148,13 +148,13 @@ class Series(base.IndexOpsMixin, generic.NDFrame): _accessors = frozenset(['dt', 'cat', 'str']) # Datetimelike delegation methods - dt = accessors.AccessorProperty(CombinedDatetimelikeProperties) + dt = accessors.AccessorProperty(CombinedDatetimelikeDelegate) # Categorical methods - cat = accessors.AccessorProperty(CategoricalAccessor) + cat = accessors.AccessorProperty(CategoricalDelegate) # string methods - str = accessors.AccessorProperty(strings.StringAccessor) + str = accessors.AccessorProperty(strings.StringDelegate) def __init__(self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False): From d3a446017d3bef775439b5b041a9d309a8a5c657 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 25 Jul 2017 08:35:21 -0700 Subject: [PATCH 14/15] Deprecate StringAccessorMixin Implement _dir_additions and _dir_deletions in Index --- pandas/core/accessors.py | 13 ++++++------- pandas/core/indexes/base.py | 23 ++++++++++++++++++----- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/pandas/core/accessors.py b/pandas/core/accessors.py index 103321735ce60..7e831c8a485fc 100644 --- a/pandas/core/accessors.py +++ b/pandas/core/accessors.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- from pandas.core.base import PandasObject +from pandas.core.common import AbstractMethodError class PandasDelegate(PandasObject): @@ -48,7 +49,7 @@ def _make_accessor(cls, data): # pragma: no cover Delegate : instance of PandasDelegate or subclass """ - raise NotImplementedError( + raise AbstractMethodError( 'It is up to subclasses to implement ' '_make_accessor. This does input validation on the object to ' 'which the accessor is being pinned. ' @@ -151,12 +152,10 @@ def _make_accessor(cls, data): [...] - This replaces the older usage in which following a class definition - we would use `Foo._add_delegate_accessors(...)`. The motivation - is that we would like to keep as much of a class's internals inside - the class definition. For things that we cannot keep directly - in the class definition, a decorator is more directly tied to - the definition than a method call outside the definition. + The motivation is that we would like to keep as much of a class's + internals inside the class definition. For things that we cannot + keep directly in the class definition, a decorator is more directly + tied to the definition than a method call outside the definition. """ # Note: we really only need the `delegate` here for the docstring diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c95a9598604ee..3deec8d55d936 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -40,18 +40,15 @@ _asarray_tuplesafe) from pandas.core.base import PandasObject, IndexOpsMixin -import pandas.core.base as base +from pandas.core import base, accessors, missing, sorting, strings from pandas.util._decorators import (Appender, Substitution, cache_readonly, deprecate_kwarg) from pandas.core.indexes.frozen import FrozenList import pandas.core.common as com import pandas.core.dtypes.concat as _concat -import pandas.core.missing as missing import pandas.core.algorithms as algos -import pandas.core.sorting as sorting from pandas.io.formats.printing import pprint_thing from pandas.core.ops import _comp_method_OBJECT_ARRAY -from pandas.core.strings import StringAccessorMixin from pandas.core.config import get_option @@ -97,7 +94,7 @@ def _new_Index(cls, d): return cls.__new__(cls, **d) -class Index(IndexOpsMixin, StringAccessorMixin, PandasObject): +class Index(IndexOpsMixin, PandasObject): """ Immutable ndarray implementing an ordered, sliceable set. The basic object storing axis labels for all pandas objects @@ -150,6 +147,22 @@ class Index(IndexOpsMixin, StringAccessorMixin, PandasObject): _engine_type = libindex.ObjectEngine + _accessors = frozenset(['dt', 'cat', 'str']) + str = accessors.AccessorProperty(strings.StringDelegate) + + def _dir_deletions(self): + return self._accessors + + def _dir_additions(self): + rv = set() + for accessor in self._accessors: + try: + getattr(self, accessor) + rv.add(accessor) + except AttributeError: + pass + return rv + def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, tupleize_cols=True, **kwargs): From 73a0633c3d0fb646cce1d18f15a1098c5e12ea3e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 25 Jul 2017 10:29:11 -0700 Subject: [PATCH 15/15] lint fixes --- pandas/core/indexes/accessors.py | 20 ++++++++++---------- pandas/core/indexes/category.py | 1 + 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index b3ed44daa220d..fbe5a194314e7 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -64,23 +64,23 @@ def maybe_to_datetimelike(data, copy=False): if is_datetime64_dtype(data.dtype): return DatetimeDelegate(DatetimeIndex(data, copy=copy, freq='infer'), - index, name=name, orig=orig) + index, name=name, orig=orig) elif is_datetime64tz_dtype(data.dtype): return DatetimeDelegate(DatetimeIndex(data, copy=copy, freq='infer', - ambiguous='infer'), - index, data.name, orig=orig) + ambiguous='infer'), + index, data.name, orig=orig) elif is_timedelta64_dtype(data.dtype): return TimedeltaDelegate(TimedeltaIndex(data, copy=copy, - freq='infer'), index, - name=name, orig=orig) + freq='infer'), index, + name=name, orig=orig) else: if is_period_arraylike(data): - return PeriodProperties(PeriodIndex(data, copy=copy), index, - name=name, orig=orig) + return PeriodDelegate(PeriodIndex(data, copy=copy), index, + name=name, orig=orig) if is_datetime_arraylike(data): return DatetimeDelegate(DatetimeIndex(data, copy=copy, - freq='infer'), index, - name=name, orig=orig) + freq='infer'), index, + name=name, orig=orig) raise TypeError("cannot convert an object of type {0} to a " "datetimelike index".format(type(data))) @@ -222,7 +222,7 @@ def components(self): @accessors.wrap_delegate_names(delegate=PeriodIndex, accessors=PeriodIndex._datetimelike_methods, typ='method') -class PeriodProperties(BaseDatetimeDelegate): +class PeriodDelegate(BaseDatetimeDelegate): """ Accessor object for datetimelike properties of the Series values. diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 2c4a4225a3364..0ac8299d7b9d6 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -705,6 +705,7 @@ def _delegate_method(self, name, *args, **kwargs): return res return CategoricalIndex(res, name=self.name) + CategoricalIndex._add_numeric_methods_add_sub_disabled() CategoricalIndex._add_numeric_methods_disabled() CategoricalIndex._add_logical_methods_disabled()