Skip to content

BUG: indexing changes to .loc for compat to .ix for several situations #15120

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ Other API Changes
- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`)
- ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`)
- ``DataFrame.applymap()`` with an empty ``DataFrame`` will return a copy of the empty ``DataFrame`` instead of a ``Series`` (:issue:`8222`)

- ``.loc`` has compat with ``.ix`` for accepting iterators, and NamedTuples (:issue:`15120`)
- ``pd.read_csv()`` will now issue a ``ParserWarning`` whenever there are conflicting values provided by the ``dialect`` parameter and the user (:issue:`14898`)
- ``pd.read_csv()`` will now raise a ``ValueError`` for the C engine if the quote character is larger than than one byte (:issue:`11592`)
- ``inplace`` arguments now require a boolean value, else a ``ValueError`` is thrown (:issue:`14189`)
Expand Down Expand Up @@ -318,7 +318,7 @@ Bug Fixes
- Bug in ``Series`` construction with a datetimetz (:issue:`14928`)

- Bug in compat for passing long integers to ``Timestamp.replace`` (:issue:`15030`)

- Bug in ``.loc`` that would not return the correct dtype for scalar access for a DataFrame (:issue:`11617`)



Expand Down
80 changes: 73 additions & 7 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
is_categorical_dtype,
is_list_like,
is_sequence,
is_iterator,
is_scalar,
is_sparse,
_is_unorderable_exception,
Expand Down Expand Up @@ -1300,17 +1301,24 @@ class _LocationIndexer(_NDFrameIndexer):
_exception = Exception

def __getitem__(self, key):
if isinstance(key, tuple):
key = tuple(com._apply_if_callable(x, self.obj) for x in key)
else:
# scalar callable may return tuple
key = com._apply_if_callable(key, self.obj)

if type(key) is tuple:
key = tuple(com._apply_if_callable(x, self.obj) for x in key)
try:
if self._is_scalar_access(key):
return self._getitem_scalar(key)
except (KeyError, IndexError):
pass
return self._getitem_tuple(key)
else:
key = com._apply_if_callable(key, self.obj)
return self._getitem_axis(key, axis=0)

def _is_scalar_access(self, key):
raise NotImplementedError()

def _getitem_scalar(self, key):
raise NotImplementedError()

def _getitem_axis(self, key, axis=0):
raise NotImplementedError()

Expand Down Expand Up @@ -1389,7 +1397,8 @@ def _has_valid_type(self, key, axis):
return True

# TODO: don't check the entire key unless necessary
if len(key) and np.all(ax.get_indexer_for(key) < 0):
if (not is_iterator(key) and len(key) and
np.all(ax.get_indexer_for(key) < 0)):

raise KeyError("None of [%s] are in the [%s]" %
(key, self.obj._get_axis_name(axis)))
Expand Down Expand Up @@ -1420,6 +1429,36 @@ def error():

return True

def _is_scalar_access(self, key):
# this is a shortcut accessor to both .loc and .iloc
# that provide the equivalent access of .at and .iat
# a) avoid getting things via sections and (to minimize dtype changes)
# b) provide a performant path
if not hasattr(key, '__len__'):
return False

if len(key) != self.ndim:
return False

for i, k in enumerate(key):
if not is_scalar(k):
return False

ax = self.obj.axes[i]
if isinstance(ax, MultiIndex):
return False

if not ax.is_unique:
return False

return True

def _getitem_scalar(self, key):
# a fast-path to scalar access
# if not, raise
values = self.obj.get_value(*key)
return values

def _get_partial_string_timestamp_match_key(self, key, labels):
"""Translate any partial string timestamp matches in key, returning the
new key (GH 10331)"""
Expand Down Expand Up @@ -1536,6 +1575,33 @@ def _has_valid_type(self, key, axis):
def _has_valid_setitem_indexer(self, indexer):
self._has_valid_positional_setitem_indexer(indexer)

def _is_scalar_access(self, key):
# this is a shortcut accessor to both .loc and .iloc
# that provide the equivalent access of .at and .iat
# a) avoid getting things via sections and (to minimize dtype changes)
# b) provide a performant path
if not hasattr(key, '__len__'):
return False

if len(key) != self.ndim:
return False

for i, k in enumerate(key):
if not is_integer(k):
return False

ax = self.obj.axes[i]
if not ax.is_unique:
return False

return True

def _getitem_scalar(self, key):
# a fast-path to scalar access
# if not, raise
values = self.obj.get_value(*key, takeable=True)
return values

def _is_valid_integer(self, key, axis):
# return a boolean if we have a valid integer indexer

Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/frame/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ def test_getitem_iterator(self):
expected = self.frame.ix[:, ['A', 'B', 'C']]
assert_frame_equal(result, expected)

idx = iter(['A', 'B', 'C'])
result = self.frame.loc[:, idx]
expected = self.frame.loc[:, ['A', 'B', 'C']]
assert_frame_equal(result, expected)

def test_getitem_list(self):
self.frame.columns.name = 'foo'

Expand Down Expand Up @@ -1667,6 +1672,24 @@ def test_single_element_ix_dont_upcast(self):
result = self.frame.ix[self.frame.index[5], 'E']
self.assertTrue(is_integer(result))

result = self.frame.loc[self.frame.index[5], 'E']
self.assertTrue(is_integer(result))

# GH 11617
df = pd.DataFrame(dict(a=[1.23]))
df["b"] = 666

result = df.ix[0, "b"]
self.assertTrue(is_integer(result))
result = df.loc[0, "b"]
self.assertTrue(is_integer(result))

expected = Series([666], [0], name='b')
result = df.ix[[0], "b"]
assert_series_equal(result, expected)
result = df.loc[[0], "b"]
assert_series_equal(result, expected)

def test_irow(self):
df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2))

Expand Down Expand Up @@ -2159,9 +2182,13 @@ def test_index_namedtuple(self):
index = Index([idx1, idx2],
name="composite_index", tupleize_cols=False)
df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"])

result = df.ix[IndexType("foo", "bar")]["A"]
self.assertEqual(result, 1)

result = df.loc[IndexType("foo", "bar")]["A"]
self.assertEqual(result, 1)

def test_boolean_indexing(self):
idx = lrange(3)
cols = ['A', 'B', 'C']
Expand Down