Skip to content

Commit f42e960

Browse files
committed
BUG: indexing changes to .loc for compat to .ix for several situations
handle iterator handle NamedTuple .loc retuns scalar selection dtypes correctly, closes pandas-dev#11617 xref pandas-dev#15113
1 parent 0fe491d commit f42e960

File tree

3 files changed

+101
-8
lines changed

3 files changed

+101
-8
lines changed

doc/source/whatsnew/v0.20.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ Other API Changes
244244
- ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`)
245245
- ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`)
246246
- ``DataFrame.applymap()`` with an empty ``DataFrame`` will return a copy of the empty ``DataFrame`` instead of a ``Series`` (:issue:`8222`)
247-
247+
- ``.loc`` has compat with ``.ix`` for accepting iterators, and NamedTuples (:issue:``)
248248
- ``pd.read_csv()`` will now issue a ``ParserWarning`` whenever there are conflicting values provided by the ``dialect`` parameter and the user (:issue:`14898`)
249249
- ``pd.read_csv()`` will now raise a ``ValueError`` for the C engine if the quote character is larger than than one byte (:issue:`11592`)
250250
- ``inplace`` arguments now require a boolean value, else a ``ValueError`` is thrown (:issue:`14189`)

pandas/core/indexing.py

+73-7
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
is_categorical_dtype,
1010
is_list_like,
1111
is_sequence,
12+
is_iterator,
1213
is_scalar,
1314
is_sparse,
1415
_is_unorderable_exception,
@@ -1300,17 +1301,24 @@ class _LocationIndexer(_NDFrameIndexer):
13001301
_exception = Exception
13011302

13021303
def __getitem__(self, key):
1303-
if isinstance(key, tuple):
1304-
key = tuple(com._apply_if_callable(x, self.obj) for x in key)
1305-
else:
1306-
# scalar callable may return tuple
1307-
key = com._apply_if_callable(key, self.obj)
1308-
13091304
if type(key) is tuple:
1305+
key = tuple(com._apply_if_callable(x, self.obj) for x in key)
1306+
try:
1307+
if self._is_scalar_access(key):
1308+
return self._getitem_scalar(key)
1309+
except (KeyError, IndexError):
1310+
pass
13101311
return self._getitem_tuple(key)
13111312
else:
1313+
key = com._apply_if_callable(key, self.obj)
13121314
return self._getitem_axis(key, axis=0)
13131315

1316+
def _is_scalar_access(self, key):
1317+
raise NotImplementedError()
1318+
1319+
def _getitem_scalar(self, key):
1320+
raise NotImplementedError()
1321+
13141322
def _getitem_axis(self, key, axis=0):
13151323
raise NotImplementedError()
13161324

@@ -1389,7 +1397,8 @@ def _has_valid_type(self, key, axis):
13891397
return True
13901398

13911399
# TODO: don't check the entire key unless necessary
1392-
if len(key) and np.all(ax.get_indexer_for(key) < 0):
1400+
if (not is_iterator(key) and len(key) and
1401+
np.all(ax.get_indexer_for(key) < 0)):
13931402

13941403
raise KeyError("None of [%s] are in the [%s]" %
13951404
(key, self.obj._get_axis_name(axis)))
@@ -1420,6 +1429,36 @@ def error():
14201429

14211430
return True
14221431

1432+
def _is_scalar_access(self, key):
1433+
# this is a shortcut accessor to both .loc and .iloc
1434+
# that provide the equivalent access of .at and .iat
1435+
# a) avoid getting things via sections and (to minimize dtype changes)
1436+
# b) provide a performant path
1437+
if not hasattr(key, '__len__'):
1438+
return False
1439+
1440+
if len(key) != self.ndim:
1441+
return False
1442+
1443+
for i, k in enumerate(key):
1444+
if not is_scalar(k):
1445+
return False
1446+
1447+
ax = self.obj.axes[i]
1448+
if isinstance(ax, MultiIndex):
1449+
return False
1450+
1451+
if not ax.is_unique:
1452+
return False
1453+
1454+
return True
1455+
1456+
def _getitem_scalar(self, key):
1457+
# a fast-path to scalar access
1458+
# if not, raise
1459+
values = self.obj.get_value(*key)
1460+
return values
1461+
14231462
def _get_partial_string_timestamp_match_key(self, key, labels):
14241463
"""Translate any partial string timestamp matches in key, returning the
14251464
new key (GH 10331)"""
@@ -1536,6 +1575,33 @@ def _has_valid_type(self, key, axis):
15361575
def _has_valid_setitem_indexer(self, indexer):
15371576
self._has_valid_positional_setitem_indexer(indexer)
15381577

1578+
def _is_scalar_access(self, key):
1579+
# this is a shortcut accessor to both .loc and .iloc
1580+
# that provide the equivalent access of .at and .iat
1581+
# a) avoid getting things via sections and (to minimize dtype changes)
1582+
# b) provide a performant path
1583+
if not hasattr(key, '__len__'):
1584+
return False
1585+
1586+
if len(key) != self.ndim:
1587+
return False
1588+
1589+
for i, k in enumerate(key):
1590+
if not is_integer(k):
1591+
return False
1592+
1593+
ax = self.obj.axes[i]
1594+
if not ax.is_unique:
1595+
return False
1596+
1597+
return True
1598+
1599+
def _getitem_scalar(self, key):
1600+
# a fast-path to scalar access
1601+
# if not, raise
1602+
values = self.obj.get_value(*key, takeable=True)
1603+
return values
1604+
15391605
def _is_valid_integer(self, key, axis):
15401606
# return a boolean if we have a valid integer indexer
15411607

pandas/tests/frame/test_indexing.py

+27
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,11 @@ def test_getitem_iterator(self):
9393
expected = self.frame.ix[:, ['A', 'B', 'C']]
9494
assert_frame_equal(result, expected)
9595

96+
idx = iter(['A', 'B', 'C'])
97+
result = self.frame.loc[:, idx]
98+
expected = self.frame.loc[:, ['A', 'B', 'C']]
99+
assert_frame_equal(result, expected)
100+
96101
def test_getitem_list(self):
97102
self.frame.columns.name = 'foo'
98103

@@ -1667,6 +1672,24 @@ def test_single_element_ix_dont_upcast(self):
16671672
result = self.frame.ix[self.frame.index[5], 'E']
16681673
self.assertTrue(is_integer(result))
16691674

1675+
result = self.frame.loc[self.frame.index[5], 'E']
1676+
self.assertTrue(is_integer(result))
1677+
1678+
# GH 11617
1679+
df = pd.DataFrame(dict(a=[1.23]))
1680+
df["b"] = 666
1681+
1682+
result = df.ix[0, "b"]
1683+
self.assertTrue(is_integer(result))
1684+
result = df.loc[0, "b"]
1685+
self.assertTrue(is_integer(result))
1686+
1687+
expected = Series([666], [0], name='b')
1688+
result = df.ix[[0], "b"]
1689+
assert_series_equal(result, expected)
1690+
result = df.loc[[0], "b"]
1691+
assert_series_equal(result, expected)
1692+
16701693
def test_irow(self):
16711694
df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2))
16721695

@@ -2159,9 +2182,13 @@ def test_index_namedtuple(self):
21592182
index = Index([idx1, idx2],
21602183
name="composite_index", tupleize_cols=False)
21612184
df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"])
2185+
21622186
result = df.ix[IndexType("foo", "bar")]["A"]
21632187
self.assertEqual(result, 1)
21642188

2189+
result = df.loc[IndexType("foo", "bar")]["A"]
2190+
self.assertEqual(result, 1)
2191+
21652192
def test_boolean_indexing(self):
21662193
idx = lrange(3)
21672194
cols = ['A', 'B', 'C']

0 commit comments

Comments
 (0)