From d1311fdb5478849c6c525eda69632cc81468f7cf Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 8 Feb 2020 09:00:07 -0800 Subject: [PATCH 1/4] CLN: avoid private indexer usage --- pandas/_libs/reduction.pyx | 2 +- pandas/core/frame.py | 3 ++- pandas/core/groupby/ops.py | 12 ++++++++---- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 43d253f632f0f..5308b0c9419e8 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -310,7 +310,7 @@ cdef class SeriesGrouper(_BaseGrouper): Py_ssize_t ngroups, object dummy): # in practice we always pass either obj[:0] or the - # safer obj._get_values(slice(None, 0)) + # safer obj.iloc[:0] assert dummy is not None if len(series) == 0: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e0efa93379bca..4dbe11729f503 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2777,7 +2777,8 @@ def __getitem__(self, key): if indexer is not None: # either we have a slice or we have a string that can be converted # to a slice for partial-string date indexing - return self._slice(indexer, axis=0) + return self.iloc[indexer] + #return self._slice(indexer, axis=0) # Do we have a (boolean) DataFrame? if isinstance(key, DataFrame): diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 761353ca5a6ca..7c9a57e5d8653 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -658,7 +658,7 @@ def _aggregate_series_fast(self, obj: Series, func): group_index, _, ngroups = self.group_info # avoids object / Series creation overhead - dummy = obj._get_values(slice(None, 0)) + dummy = obj.iloc[:0] indexer = get_group_index_sorter(group_index, ngroups) obj = obj.take(indexer) group_index = algorithms.take_nd(group_index, indexer, allow_fill=False) @@ -780,7 +780,11 @@ def get_iterator(self, data: FrameOrSeries, axis: int = 0): Generator yielding sequence of (name, subsetted object) for each group """ - slicer = lambda start, edge: data._slice(slice(start, edge), axis=axis) + if axis == 0: + slicer = lambda start, edge: data.iloc[start:edge] + else: + slicer = lambda start, edge: data.iloc[:, start:edge] + #slicer = lambda start, edge: data._slice(slice(start, edge), axis=axis) length = len(data.axes[axis]) start = 0 @@ -919,7 +923,7 @@ def _chop(self, sdata, slice_obj: slice) -> NDFrame: class SeriesSplitter(DataSplitter): def _chop(self, sdata: Series, slice_obj: slice) -> Series: - return sdata._get_values(slice_obj) + return sdata.iloc[slice_obj] class FrameSplitter(DataSplitter): @@ -934,7 +938,7 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: if self.axis == 0: return sdata.iloc[slice_obj] else: - return sdata._slice(slice_obj, axis=1) + return sdata.iloc[:, slice_obj] def get_splitter(data: FrameOrSeries, *args, **kwargs) -> DataSplitter: From 9dd2110e76bba46b833fcea07c8ac86ede5566f4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 8 Feb 2020 11:30:40 -0800 Subject: [PATCH 2/4] remove commented-out --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4dbe11729f503..8cdb9bb616d69 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2778,7 +2778,6 @@ def __getitem__(self, key): # either we have a slice or we have a string that can be converted # to a slice for partial-string date indexing return self.iloc[indexer] - #return self._slice(indexer, axis=0) # Do we have a (boolean) DataFrame? if isinstance(key, DataFrame): From ff89ea248bdbb8ee99659f2235548d8795630144 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 8 Feb 2020 11:33:57 -0800 Subject: [PATCH 3/4] standardize on obj.iloc[:0] --- pandas/_libs/reduction.pyx | 3 +-- pandas/tests/groupby/test_bin_groupby.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 5308b0c9419e8..b27072aa66708 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -309,8 +309,7 @@ cdef class SeriesGrouper(_BaseGrouper): def __init__(self, object series, object f, object labels, Py_ssize_t ngroups, object dummy): - # in practice we always pass either obj[:0] or the - # safer obj.iloc[:0] + # in practice we always pass obj.iloc[:0] or equivalent assert dummy is not None if len(series) == 0: diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index ad71f73e80e64..ff74d374e5e3f 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -11,7 +11,7 @@ def test_series_grouper(): obj = Series(np.random.randn(10)) - dummy = obj[:0] + dummy = obj.iloc[:0] labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.int64) @@ -28,7 +28,7 @@ def test_series_grouper(): def test_series_grouper_requires_nonempty_raises(): # GH#29500 obj = Series(np.random.randn(10)) - dummy = obj[:0] + dummy = obj.iloc[:0] labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.int64) with pytest.raises(ValueError, match="SeriesGrouper requires non-empty `series`"): From 77601ec2c8deeed637edfb98e03e9d5169a8dd75 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 8 Feb 2020 18:50:31 -0800 Subject: [PATCH 4/4] cleanup --- pandas/core/frame.py | 2 +- pandas/core/groupby/ops.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8cdb9bb616d69..e0efa93379bca 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2777,7 +2777,7 @@ def __getitem__(self, key): if indexer is not None: # either we have a slice or we have a string that can be converted # to a slice for partial-string date indexing - return self.iloc[indexer] + return self._slice(indexer, axis=0) # Do we have a (boolean) DataFrame? if isinstance(key, DataFrame): diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 7c9a57e5d8653..4e593ce543ea6 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -784,7 +784,7 @@ def get_iterator(self, data: FrameOrSeries, axis: int = 0): slicer = lambda start, edge: data.iloc[start:edge] else: slicer = lambda start, edge: data.iloc[:, start:edge] - #slicer = lambda start, edge: data._slice(slice(start, edge), axis=axis) + length = len(data.axes[axis]) start = 0