Skip to content

CLN: Prune unnecessary indexing code #27576

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jul 25, 2019
10 changes: 3 additions & 7 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,9 @@
)
from pandas.core.indexes import base as ibase
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexes.multi import maybe_droplevels
from pandas.core.indexes.period import PeriodIndex
from pandas.core.indexing import (
check_bool_indexer,
convert_to_index_sliceable,
maybe_droplevels,
)
from pandas.core.indexing import check_bool_indexer, convert_to_index_sliceable
from pandas.core.internals import BlockManager
from pandas.core.internals.construction import (
arrays_to_mgr,
Expand Down Expand Up @@ -2794,8 +2791,6 @@ def _ixs(self, i: int, axis: int = 0):
if axis == 0:
label = self.index[i]
new_values = self._data.fast_xs(i)
if is_scalar(new_values):
return new_values

# if we are a copy, mark as such
copy = isinstance(new_values, np.ndarray) and new_values.base is None
Expand Down Expand Up @@ -2906,6 +2901,7 @@ def _getitem_bool_array(self, key):
return self.take(indexer, axis=0)

def _getitem_multilevel(self, key):
# self.columns is a MultiIndex
loc = self.columns.get_loc(key)
if isinstance(loc, (slice, Series, np.ndarray, Index)):
new_columns = self.columns[loc]
Expand Down
47 changes: 22 additions & 25 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3296,11 +3296,8 @@ def _maybe_update_cacher(self, clear=False, verify_is_copy=True):
if clear:
self._clear_item_cache()

def _clear_item_cache(self, i=None):
if i is not None:
self._item_cache.pop(i, None)
else:
self._item_cache.clear()
def _clear_item_cache(self):
self._item_cache.clear()

# ----------------------------------------------------------------------
# Indexing Methods
Expand Down Expand Up @@ -3559,27 +3556,8 @@ class animal locomotion

_xs = xs # type: Callable

def get(self, key, default=None):
"""
Get item from object for given key (ex: DataFrame column).

Returns default value if not found.

Parameters
----------
key : object

Returns
-------
value : same type as items contained in object
"""
try:
return self[key]
except (KeyError, ValueError, IndexError):
return default

def __getitem__(self, item):
return self._get_item_cache(item)
raise AbstractMethodError(self)

def _get_item_cache(self, item):
"""Return the cached item, item represents a label indexer."""
Expand Down Expand Up @@ -3770,6 +3748,25 @@ def __delitem__(self, key):
# ----------------------------------------------------------------------
# Unsorted

def get(self, key, default=None):
"""
Get item from object for given key (ex: DataFrame column).

Returns default value if not found.

Parameters
----------
key : object

Returns
-------
value : same type as items contained in object
"""
try:
return self[key]
except (KeyError, ValueError, IndexError):
return default

@property
def _is_view(self):
"""Return boolean indicating if self is view of another array """
Expand Down
49 changes: 39 additions & 10 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1470,9 +1470,6 @@ def dropna(self, how="any"):
return self.copy(codes=new_codes, deep=True)

def get_value(self, series, key):
# somewhat broken encapsulation
from pandas.core.indexing import maybe_droplevels

# Label-based
s = com.values_from_object(series)
k = com.values_from_object(key)
Expand Down Expand Up @@ -2709,7 +2706,7 @@ def _maybe_to_slice(loc):

return _maybe_to_slice(loc) if len(loc) != stop - start else slice(start, stop)

def get_loc_level(self, key, level=0, drop_level=True):
def get_loc_level(self, key, level=0, drop_level: bool = True):
"""
Get both the location for the requested label(s) and the
resulting sliced index.
Expand Down Expand Up @@ -2750,7 +2747,8 @@ def get_loc_level(self, key, level=0, drop_level=True):
(1, None)
"""

def maybe_droplevels(indexer, levels, drop_level):
# different name to distinguish from maybe_droplevels
def maybe_mi_droplevels(indexer, levels, drop_level: bool):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in future maybe pull this out to a module level function

if not drop_level:
return self[indexer]
# kludgearound
Expand Down Expand Up @@ -2780,7 +2778,7 @@ def maybe_droplevels(indexer, levels, drop_level):

result = loc if result is None else result & loc

return result, maybe_droplevels(result, level, drop_level)
return result, maybe_mi_droplevels(result, level, drop_level)

level = self._get_level_number(level)

Expand All @@ -2793,7 +2791,7 @@ def maybe_droplevels(indexer, levels, drop_level):
try:
if key in self.levels[0]:
indexer = self._get_level_indexer(key, level=level)
new_index = maybe_droplevels(indexer, [0], drop_level)
new_index = maybe_mi_droplevels(indexer, [0], drop_level)
return indexer, new_index
except TypeError:
pass
Expand All @@ -2808,7 +2806,7 @@ def partial_selection(key, indexer=None):
ilevels = [
i for i in range(len(key)) if key[i] != slice(None, None)
]
return indexer, maybe_droplevels(indexer, ilevels, drop_level)
return indexer, maybe_mi_droplevels(indexer, ilevels, drop_level)

if len(key) == self.nlevels and self.is_unique:
# Complete key in unique index -> standard get_loc
Expand Down Expand Up @@ -2843,10 +2841,10 @@ def partial_selection(key, indexer=None):
if indexer is None:
indexer = slice(None, None)
ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)]
return indexer, maybe_droplevels(indexer, ilevels, drop_level)
return indexer, maybe_mi_droplevels(indexer, ilevels, drop_level)
else:
indexer = self._get_level_indexer(key, level=level)
return indexer, maybe_droplevels(indexer, [level], drop_level)
return indexer, maybe_mi_droplevels(indexer, [level], drop_level)

def _get_level_indexer(self, key, level=0, indexer=None):
# return an indexer, boolean array or a slice showing where the key is
Expand Down Expand Up @@ -3464,3 +3462,34 @@ def _sparsify(label_list, start=0, sentinel=""):

def _get_na_rep(dtype):
return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype, "NaN")


def maybe_droplevels(index, key):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a doc-string

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure

"""
Attempt to drop level or levels from the given index.

Parameters
----------
index: Index
key : scalar or tuple

Returns
-------
Index
"""
# drop levels
original_index = index
if isinstance(key, tuple):
for _ in key:
try:
index = index.droplevel(0)
except ValueError:
# we have dropped too much, so back out
return original_index
else:
try:
index = index.droplevel(0)
except ValueError:
pass

return index
70 changes: 32 additions & 38 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def _validate_key(self, key, axis: int):
def _has_valid_tuple(self, key: Tuple):
""" check the key for valid keys across my indexer """
for i, k in enumerate(key):
if i >= self.obj.ndim:
if i >= self.ndim:
raise IndexingError("Too many indexers")
try:
self._validate_key(k, i)
Expand Down Expand Up @@ -254,7 +254,7 @@ def _convert_tuple(self, key, is_setter: bool = False):
keyidx.append(slice(None))
else:
for i, k in enumerate(key):
if i >= self.obj.ndim:
if i >= self.ndim:
raise IndexingError("Too many indexers")
idx = self._convert_to_indexer(k, axis=i, is_setter=is_setter)
keyidx.append(idx)
Expand Down Expand Up @@ -286,7 +286,7 @@ def _has_valid_positional_setitem_indexer(self, indexer):
raise IndexError("{0} cannot enlarge its target object".format(self.name))
else:
if not isinstance(indexer, tuple):
indexer = self._tuplify(indexer)
indexer = _tuplify(self.ndim, indexer)
for ax, i in zip(self.obj.axes, indexer):
if isinstance(i, slice):
# should check the stop slice?
Expand Down Expand Up @@ -401,7 +401,7 @@ def _setitem_with_indexer(self, indexer, value):
assert info_axis == 1

if not isinstance(indexer, tuple):
indexer = self._tuplify(indexer)
indexer = _tuplify(self.ndim, indexer)

if isinstance(value, ABCSeries):
value = self._align_series(indexer, value)
Expand Down Expand Up @@ -670,7 +670,7 @@ def ravel(i):
aligners = [not com.is_null_slice(idx) for idx in indexer]
sum_aligners = sum(aligners)
single_aligner = sum_aligners == 1
is_frame = self.obj.ndim == 2
is_frame = self.ndim == 2
obj = self.obj

# are we a single alignable value on a non-primary
Expand Down Expand Up @@ -731,7 +731,7 @@ def ravel(i):
raise ValueError("Incompatible indexer with Series")

def _align_frame(self, indexer, df: ABCDataFrame):
is_frame = self.obj.ndim == 2
is_frame = self.ndim == 2

if isinstance(indexer, tuple):

Expand Down Expand Up @@ -867,7 +867,7 @@ def _handle_lowerdim_multi_index_axis0(self, tup: Tuple):
except KeyError as ek:
# raise KeyError if number of indexers match
# else IndexingError will be raised
if len(tup) <= self.obj.index.nlevels and len(tup) > self.obj.ndim:
if len(tup) <= self.obj.index.nlevels and len(tup) > self.ndim:
raise ek
except Exception as e1:
if isinstance(tup[0], (slice, Index)):
Expand Down Expand Up @@ -900,7 +900,7 @@ def _getitem_lowerdim(self, tup: Tuple):
if result is not None:
return result

if len(tup) > self.obj.ndim:
if len(tup) > self.ndim:
raise IndexingError("Too many indexers. handle elsewhere")

# to avoid wasted computation
Expand Down Expand Up @@ -1274,11 +1274,6 @@ def _convert_to_indexer(
return {"key": obj}
raise

def _tuplify(self, loc):
tup = [slice(None, None) for _ in range(self.ndim)]
tup[0] = loc
return tuple(tup)

def _get_slice_axis(self, slice_obj: slice, axis: int):
# caller is responsible for ensuring non-None axis
obj = self.obj
Expand Down Expand Up @@ -2077,6 +2072,8 @@ def _getitem_tuple(self, tup: Tuple):

# if the dim was reduced, then pass a lower-dim the next time
if retval.ndim < self.ndim:
# TODO: this is never reached in tests; can we confirm that
# it is impossible?
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@toobaz any thoughts on showing this is unreachable? If we can confirm this is the case, then this chunk of the code can be shared with the other _getitem_tuple method above

(pinging you since you wrote the wiki page on simplifying this code)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like this also wasnt hit in 0.24.2

Copy link

@ghost ghost Jul 25, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And for context, example case which triggers on that commit but not on master

Code
import numpy as np
from pandas import *


mi_int = DataFrame(
    np.random.randn(3, 3),
    columns=[[2, 2, 4], [6, 8, 10]],
    index=[[4, 4, 8], [8, 10, 12]],
)

# this triggers the code path when testing against 9f0dc3befb
rs = mi_int.iloc[:, 2]  

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that path is ever reached. It should be reached by something like df.loc[1, [0, 1]], which however gets captured in the call above to self._getitem_lowerdim(tup) (which doesn't make sense, because it is named "lowerdim", but then it resorts to _is_nested_tuple_indexer). So to sum up: I don't know exactly what the aim of that axis -= 1 is, I don't think it is ever reached, but I think it should be reached in principle.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(but no major objections to removing for now)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pilkibun thanks for tracking that down

@toobaz how about we fix getitem_lowerdim to make sense before removing this check. simplifying this code is going to take a few passes

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pilkibun there is another un-hit block on lines 440-450. any idea what that was intended to catch?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this was related to Panel indexing, can probably go.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@toobaz how about we fix getitem_lowerdim to make sense before removing this check. simplifying this code is going to take a few passes

No objection

axis -= 1

# try to get for the next axis
Expand Down Expand Up @@ -2152,7 +2149,7 @@ def _convert_to_indexer(
)


class _ScalarAccessIndexer(_NDFrameIndexer):
class _ScalarAccessIndexer(_NDFrameIndexerBase):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only _NDFrameIndexer method these use is _tuplify, which is why this PR takes that out and makes it a function instead of a method. Much easier to reason about these classes now

""" access scalars quickly """

def _convert_key(self, key, is_setter: bool = False):
Expand All @@ -2178,8 +2175,8 @@ def __setitem__(self, key, value):
key = com.apply_if_callable(key, self.obj)

if not isinstance(key, tuple):
key = self._tuplify(key)
if len(key) != self.obj.ndim:
key = _tuplify(self.ndim, key)
if len(key) != self.ndim:
raise ValueError("Not enough indexers for scalar access (setting)!")
key = list(self._convert_key(key, is_setter=True))
key.append(value)
Expand Down Expand Up @@ -2309,9 +2306,6 @@ class _iAtIndexer(_ScalarAccessIndexer):

_takeable = True

def _has_valid_setitem_indexer(self, indexer):
self._has_valid_positional_setitem_indexer(indexer)

def _convert_key(self, key, is_setter: bool = False):
""" require integer args (and convert to label arguments) """
for a, i in zip(self.obj.axes, key):
Expand All @@ -2320,6 +2314,25 @@ def _convert_key(self, key, is_setter: bool = False):
return key


def _tuplify(ndim: int, loc) -> tuple:
"""
Given an indexer for the first dimension, create an equivalent tuple
for indexing over all dimensions.

Parameters
----------
ndim : int
loc : object

Returns
-------
tuple
"""
tup = [slice(None, None) for _ in range(ndim)]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a doc-string

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

docstrings added. will take another look at removing the discussed lines in the next pass

tup[0] = loc
return tuple(tup)


def convert_to_index_sliceable(obj, key):
"""
if we are index sliceable, then return my slicer, otherwise return None
Expand Down Expand Up @@ -2469,25 +2482,6 @@ def need_slice(obj):
)


def maybe_droplevels(index, key):
# drop levels
original_index = index
if isinstance(key, tuple):
for _ in key:
try:
index = index.droplevel(0)
except ValueError:
# we have dropped too much, so back out
return original_index
else:
try:
index = index.droplevel(0)
except ValueError:
pass

return index


def _non_reducing_slice(slice_):
"""
Ensurse that a slice doesn't reduce to a Series or Scalar.
Expand Down
Loading