From 5e640400523b4882dd771de13cc9b0bf3ebef077 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 13 Jul 2018 15:38:25 -0700 Subject: [PATCH 1/4] Change ._data to ._parent for accessors to make it easier to identify BlockManager lookups --- pandas/core/indexes/accessors.py | 4 +- pandas/core/strings.py | 69 +++++++++++++++++--------------- pandas/plotting/_core.py | 6 +-- 3 files changed, 41 insertions(+), 38 deletions(-) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index d7b4ea63cd48c..6ab8c4659c31e 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -27,14 +27,14 @@ def __init__(self, data, orig): raise TypeError("cannot convert an object of type {0} to a " "datetimelike index".format(type(data))) - self.values = data + self._parent = data self.orig = orig self.name = getattr(data, 'name', None) self.index = getattr(data, 'index', None) self._freeze() def _get_values(self): - data = self.values + data = self._parent if is_datetime64_dtype(data.dtype): return DatetimeIndex(data, copy=False, name=self.name) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index e4765c00f80fd..11093c0330c67 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -923,7 +923,7 @@ def str_extract(arr, pat, flags=0, expand=True): if expand: return _str_extract_frame(arr._orig, pat, flags=flags) else: - result, name = _str_extract_noexpand(arr._data, pat, flags=flags) + result, name = _str_extract_noexpand(arr._parent, pat, flags=flags) return arr._wrap_result(result, name=name, expand=expand) @@ -1717,7 +1717,7 @@ def str_encode(arr, encoding, errors="strict"): def _noarg_wrapper(f, docstring=None, **kargs): def wrapper(self): - result = _na_map(f, self._data, **kargs) + result = _na_map(f, self._parent, **kargs) return self._wrap_result(result) wrapper.__name__ = f.__name__ @@ -1731,15 +1731,15 @@ def wrapper(self): def _pat_wrapper(f, flags=False, na=False, **kwargs): def wrapper1(self, pat): - result = f(self._data, pat) + result = f(self._parent, pat) return self._wrap_result(result) def wrapper2(self, pat, flags=0, **kwargs): - result = f(self._data, pat, flags=flags, **kwargs) + result = f(self._parent, pat, flags=flags, **kwargs) return self._wrap_result(result) def wrapper3(self, pat, na=np.nan): - result = f(self._data, pat, na=na) + result = f(self._parent, pat, na=na) return self._wrap_result(result) wrapper = wrapper3 if na else wrapper2 if flags else wrapper1 @@ -1779,7 +1779,7 @@ def __init__(self, data): self._is_categorical = is_categorical_dtype(data) # .values.categories works for both Series/Index - self._data = data.values.categories if self._is_categorical else data + self._parent = data.values.categories if self._is_categorical else data # save orig to blow up categoricals to the right type self._orig = data self._freeze() @@ -2329,14 +2329,14 @@ def cat(self, others=None, sep=None, na_rep=None, join=None): 'side': 'beginning', 'method': 'split'}) def split(self, pat=None, n=-1, expand=False): - result = str_split(self._data, pat, n=n) + result = str_split(self._parent, pat, n=n) return self._wrap_result(result, expand=expand) @Appender(_shared_docs['str_split'] % { 'side': 'end', 'method': 'rsplit'}) def rsplit(self, pat=None, n=-1, expand=False): - result = str_rsplit(self._data, pat, n=n) + result = str_rsplit(self._parent, pat, n=n) return self._wrap_result(result, expand=expand) _shared_docs['str_partition'] = (""" @@ -2427,7 +2427,7 @@ def rsplit(self, pat=None, n=-1, expand=False): }) def partition(self, pat=' ', expand=True): f = lambda x: x.partition(pat) - result = _na_map(f, self._data) + result = _na_map(f, self._parent) return self._wrap_result(result, expand=expand) @Appender(_shared_docs['str_partition'] % { @@ -2438,45 +2438,45 @@ def partition(self, pat=' ', expand=True): }) def rpartition(self, pat=' ', expand=True): f = lambda x: x.rpartition(pat) - result = _na_map(f, self._data) + result = _na_map(f, self._parent) return self._wrap_result(result, expand=expand) @copy(str_get) def get(self, i): - result = str_get(self._data, i) + result = str_get(self._parent, i) return self._wrap_result(result) @copy(str_join) def join(self, sep): - result = str_join(self._data, sep) + result = str_join(self._parent, sep) return self._wrap_result(result) @copy(str_contains) def contains(self, pat, case=True, flags=0, na=np.nan, regex=True): - result = str_contains(self._data, pat, case=case, flags=flags, na=na, + result = str_contains(self._parent, pat, case=case, flags=flags, na=na, regex=regex) return self._wrap_result(result) @copy(str_match) def match(self, pat, case=True, flags=0, na=np.nan, as_indexer=None): - result = str_match(self._data, pat, case=case, flags=flags, na=na, + result = str_match(self._parent, pat, case=case, flags=flags, na=na, as_indexer=as_indexer) return self._wrap_result(result) @copy(str_replace) def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True): - result = str_replace(self._data, pat, repl, n=n, case=case, + result = str_replace(self._parent, pat, repl, n=n, case=case, flags=flags, regex=regex) return self._wrap_result(result) @copy(str_repeat) def repeat(self, repeats): - result = str_repeat(self._data, repeats) + result = str_repeat(self._parent, repeats) return self._wrap_result(result) @copy(str_pad) def pad(self, width, side='left', fillchar=' '): - result = str_pad(self._data, width, side=side, fillchar=fillchar) + result = str_pad(self._parent, width, side=side, fillchar=fillchar) return self._wrap_result(result) _shared_docs['str_pad'] = (""" @@ -2569,27 +2569,27 @@ def zfill(self, width): 4 NaN dtype: object """ - result = str_pad(self._data, width, side='left', fillchar='0') + result = str_pad(self._parent, width, side='left', fillchar='0') return self._wrap_result(result) @copy(str_slice) def slice(self, start=None, stop=None, step=None): - result = str_slice(self._data, start, stop, step) + result = str_slice(self._parent, start, stop, step) return self._wrap_result(result) @copy(str_slice_replace) def slice_replace(self, start=None, stop=None, repl=None): - result = str_slice_replace(self._data, start, stop, repl) + result = str_slice_replace(self._parent, start, stop, repl) return self._wrap_result(result) @copy(str_decode) def decode(self, encoding, errors="strict"): - result = str_decode(self._data, encoding, errors) + result = str_decode(self._parent, encoding, errors) return self._wrap_result(result) @copy(str_encode) def encode(self, encoding, errors="strict"): - result = str_encode(self._data, encoding, errors) + result = str_encode(self._parent, encoding, errors) return self._wrap_result(result) _shared_docs['str_strip'] = (r""" @@ -2658,38 +2658,38 @@ def encode(self, encoding, errors="strict"): @Appender(_shared_docs['str_strip'] % dict(side='left and right sides', method='strip')) def strip(self, to_strip=None): - result = str_strip(self._data, to_strip, side='both') + result = str_strip(self._parent, to_strip, side='both') return self._wrap_result(result) @Appender(_shared_docs['str_strip'] % dict(side='left side', method='lstrip')) def lstrip(self, to_strip=None): - result = str_strip(self._data, to_strip, side='left') + result = str_strip(self._parent, to_strip, side='left') return self._wrap_result(result) @Appender(_shared_docs['str_strip'] % dict(side='right side', method='rstrip')) def rstrip(self, to_strip=None): - result = str_strip(self._data, to_strip, side='right') + result = str_strip(self._parent, to_strip, side='right') return self._wrap_result(result) @copy(str_wrap) def wrap(self, width, **kwargs): - result = str_wrap(self._data, width, **kwargs) + result = str_wrap(self._parent, width, **kwargs) return self._wrap_result(result) @copy(str_get_dummies) def get_dummies(self, sep='|'): # we need to cast to Series of strings as only that has all # methods available for making the dummies... - data = self._orig.astype(str) if self._is_categorical else self._data + data = self._orig.astype(str) if self._is_categorical else self._parent result, name = str_get_dummies(data, sep) return self._wrap_result(result, use_codes=(not self._is_categorical), name=name, expand=True) @copy(str_translate) def translate(self, table, deletechars=None): - result = str_translate(self._data, table, deletechars) + result = str_translate(self._parent, table, deletechars) return self._wrap_result(result) count = _pat_wrapper(str_count, flags=True) @@ -2732,14 +2732,15 @@ def extractall(self, pat, flags=0): dict(side='lowest', method='find', also='rfind : Return highest indexes in each strings')) def find(self, sub, start=0, end=None): - result = str_find(self._data, sub, start=start, end=end, side='left') + result = str_find(self._parent, sub, start=start, end=end, side='left') return self._wrap_result(result) @Appender(_shared_docs['find'] % dict(side='highest', method='rfind', also='find : Return lowest indexes in each strings')) def rfind(self, sub, start=0, end=None): - result = str_find(self._data, sub, start=start, end=end, side='right') + result = str_find(self._parent, sub, + start=start, end=end, side='right') return self._wrap_result(result) def normalize(self, form): @@ -2758,7 +2759,7 @@ def normalize(self, form): """ import unicodedata f = lambda x: unicodedata.normalize(form, compat.u_safe(x)) - result = _na_map(f, self._data) + result = _na_map(f, self._parent) return self._wrap_result(result) _shared_docs['index'] = (""" @@ -2789,14 +2790,16 @@ def normalize(self, form): dict(side='lowest', similar='find', method='index', also='rindex : Return highest indexes in each strings')) def index(self, sub, start=0, end=None): - result = str_index(self._data, sub, start=start, end=end, side='left') + result = str_index(self._parent, sub, + start=start, end=end, side='left') return self._wrap_result(result) @Appender(_shared_docs['index'] % dict(side='highest', similar='rfind', method='rindex', also='index : Return lowest indexes in each strings')) def rindex(self, sub, start=0, end=None): - result = str_index(self._data, sub, start=start, end=end, side='right') + result = str_index(self._parent, sub, + start=start, end=end, side='right') return self._wrap_result(result) _shared_docs['len'] = (""" diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 06020bdfd5d1d..2f40b140bf87d 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2727,7 +2727,7 @@ def _grouped_plot_by_column(plotf, data, columns=None, by=None, class BasePlotMethods(PandasObject): def __init__(self, data): - self._data = data + self._parent = data # either Series or DataFrame def __call__(self, *args, **kwargs): raise NotImplementedError @@ -2755,7 +2755,7 @@ def __call__(self, kind='line', ax=None, rot=None, fontsize=None, colormap=None, table=False, yerr=None, xerr=None, label=None, secondary_y=False, **kwds): - return plot_series(self._data, kind=kind, ax=ax, figsize=figsize, + return plot_series(self._parent, kind=kind, ax=ax, figsize=figsize, use_index=use_index, title=title, grid=grid, legend=legend, style=style, logx=logx, logy=logy, loglog=loglog, xticks=xticks, yticks=yticks, @@ -2954,7 +2954,7 @@ def __call__(self, x=None, y=None, kind='line', ax=None, rot=None, fontsize=None, colormap=None, table=False, yerr=None, xerr=None, secondary_y=False, sort_columns=False, **kwds): - return plot_frame(self._data, kind=kind, x=x, y=y, ax=ax, + return plot_frame(self._parent, kind=kind, x=x, y=y, ax=ax, subplots=subplots, sharex=sharex, sharey=sharey, layout=layout, figsize=figsize, use_index=use_index, title=title, grid=grid, legend=legend, style=style, From 9c96cf002ebffc6da8876b5ee1417d71a9330552 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 3 Aug 2018 21:34:36 -0700 Subject: [PATCH 2/4] dummy commit to force CI --- pandas/plotting/_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index a3b77a48341c7..4855a0ee9455b 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2727,7 +2727,7 @@ def _grouped_plot_by_column(plotf, data, columns=None, by=None, class BasePlotMethods(PandasObject): def __init__(self, data): - self._parent = data # either Series or DataFrame + self._parent = data # Series or DataFrame def __call__(self, *args, **kwargs): raise NotImplementedError From d4f7afa3f869507c59a0a53ce1424ebd954a2fef Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 5 Aug 2018 15:24:36 -0700 Subject: [PATCH 3/4] dummy commit to force CI --- pandas/plotting/_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 4855a0ee9455b..e81b162645b94 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2727,7 +2727,7 @@ def _grouped_plot_by_column(plotf, data, columns=None, by=None, class BasePlotMethods(PandasObject): def __init__(self, data): - self._parent = data # Series or DataFrame + self._parent = data # can be Series or DataFrame def __call__(self, *args, **kwargs): raise NotImplementedError From 4ece5b3aed1f65a66ba0ae6f89893665b188c332 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 7 Aug 2018 10:27:36 -0700 Subject: [PATCH 4/4] Change categories-->_parent for CategoricalAccessor --- pandas/core/arrays/categorical.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index eebdfe8a54a9d..003ba7608dea4 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2401,7 +2401,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): def __init__(self, data): self._validate(data) - self.categorical = data.values + self._parent = data.values self.index = data.index self.name = data.name self._freeze() @@ -2413,19 +2413,19 @@ def _validate(data): "'category' dtype") def _delegate_property_get(self, name): - return getattr(self.categorical, name) + return getattr(self._parent, name) def _delegate_property_set(self, name, new_values): - return setattr(self.categorical, name, new_values) + return setattr(self._parent, name, new_values) @property def codes(self): from pandas import Series - return Series(self.categorical.codes, index=self.index) + return Series(self._parent.codes, index=self.index) def _delegate_method(self, name, *args, **kwargs): from pandas import Series - method = getattr(self.categorical, name) + method = getattr(self._parent, name) res = method(*args, **kwargs) if res is not None: return Series(res, index=self.index, name=self.name)