From 801c8d96b3d0744608dde11067a08d3c6b939d6e Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 12 Jan 2017 14:14:34 -0500 Subject: [PATCH] BUG: indexing changes to .loc for compat to .ix for several situations handle iterator handle NamedTuple .loc retuns scalar selection dtypes correctly, closes #11617 xref #15113 --- doc/source/whatsnew/v0.20.0.txt | 4 +- pandas/core/indexing.py | 80 ++++++++++++++++++++++++++--- pandas/tests/frame/test_indexing.py | 27 ++++++++++ 3 files changed, 102 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index c82dc370e3e71..b1f85c68153cf 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -244,7 +244,7 @@ Other API Changes - ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`) - ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`) - ``DataFrame.applymap()`` with an empty ``DataFrame`` will return a copy of the empty ``DataFrame`` instead of a ``Series`` (:issue:`8222`) - +- ``.loc`` has compat with ``.ix`` for accepting iterators, and NamedTuples (:issue:`15120`) - ``pd.read_csv()`` will now issue a ``ParserWarning`` whenever there are conflicting values provided by the ``dialect`` parameter and the user (:issue:`14898`) - ``pd.read_csv()`` will now raise a ``ValueError`` for the C engine if the quote character is larger than than one byte (:issue:`11592`) - ``inplace`` arguments now require a boolean value, else a ``ValueError`` is thrown (:issue:`14189`) @@ -318,7 +318,7 @@ Bug Fixes - Bug in ``Series`` construction with a datetimetz (:issue:`14928`) - Bug in compat for passing long integers to ``Timestamp.replace`` (:issue:`15030`) - +- Bug in ``.loc`` that would not return the correct dtype for scalar access for a DataFrame (:issue:`11617`) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 9fa5b67083b2d..6970d1891ee63 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -9,6 +9,7 @@ is_categorical_dtype, is_list_like, is_sequence, + is_iterator, is_scalar, is_sparse, _is_unorderable_exception, @@ -1300,17 +1301,24 @@ class _LocationIndexer(_NDFrameIndexer): _exception = Exception def __getitem__(self, key): - if isinstance(key, tuple): - key = tuple(com._apply_if_callable(x, self.obj) for x in key) - else: - # scalar callable may return tuple - key = com._apply_if_callable(key, self.obj) - if type(key) is tuple: + key = tuple(com._apply_if_callable(x, self.obj) for x in key) + try: + if self._is_scalar_access(key): + return self._getitem_scalar(key) + except (KeyError, IndexError): + pass return self._getitem_tuple(key) else: + key = com._apply_if_callable(key, self.obj) return self._getitem_axis(key, axis=0) + def _is_scalar_access(self, key): + raise NotImplementedError() + + def _getitem_scalar(self, key): + raise NotImplementedError() + def _getitem_axis(self, key, axis=0): raise NotImplementedError() @@ -1389,7 +1397,8 @@ def _has_valid_type(self, key, axis): return True # TODO: don't check the entire key unless necessary - if len(key) and np.all(ax.get_indexer_for(key) < 0): + if (not is_iterator(key) and len(key) and + np.all(ax.get_indexer_for(key) < 0)): raise KeyError("None of [%s] are in the [%s]" % (key, self.obj._get_axis_name(axis))) @@ -1420,6 +1429,36 @@ def error(): return True + def _is_scalar_access(self, key): + # this is a shortcut accessor to both .loc and .iloc + # that provide the equivalent access of .at and .iat + # a) avoid getting things via sections and (to minimize dtype changes) + # b) provide a performant path + if not hasattr(key, '__len__'): + return False + + if len(key) != self.ndim: + return False + + for i, k in enumerate(key): + if not is_scalar(k): + return False + + ax = self.obj.axes[i] + if isinstance(ax, MultiIndex): + return False + + if not ax.is_unique: + return False + + return True + + def _getitem_scalar(self, key): + # a fast-path to scalar access + # if not, raise + values = self.obj.get_value(*key) + return values + def _get_partial_string_timestamp_match_key(self, key, labels): """Translate any partial string timestamp matches in key, returning the new key (GH 10331)""" @@ -1536,6 +1575,33 @@ def _has_valid_type(self, key, axis): def _has_valid_setitem_indexer(self, indexer): self._has_valid_positional_setitem_indexer(indexer) + def _is_scalar_access(self, key): + # this is a shortcut accessor to both .loc and .iloc + # that provide the equivalent access of .at and .iat + # a) avoid getting things via sections and (to minimize dtype changes) + # b) provide a performant path + if not hasattr(key, '__len__'): + return False + + if len(key) != self.ndim: + return False + + for i, k in enumerate(key): + if not is_integer(k): + return False + + ax = self.obj.axes[i] + if not ax.is_unique: + return False + + return True + + def _getitem_scalar(self, key): + # a fast-path to scalar access + # if not, raise + values = self.obj.get_value(*key, takeable=True) + return values + def _is_valid_integer(self, key, axis): # return a boolean if we have a valid integer indexer diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 720dcdd62dd89..abe40f7be1d90 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -93,6 +93,11 @@ def test_getitem_iterator(self): expected = self.frame.ix[:, ['A', 'B', 'C']] assert_frame_equal(result, expected) + idx = iter(['A', 'B', 'C']) + result = self.frame.loc[:, idx] + expected = self.frame.loc[:, ['A', 'B', 'C']] + assert_frame_equal(result, expected) + def test_getitem_list(self): self.frame.columns.name = 'foo' @@ -1667,6 +1672,24 @@ def test_single_element_ix_dont_upcast(self): result = self.frame.ix[self.frame.index[5], 'E'] self.assertTrue(is_integer(result)) + result = self.frame.loc[self.frame.index[5], 'E'] + self.assertTrue(is_integer(result)) + + # GH 11617 + df = pd.DataFrame(dict(a=[1.23])) + df["b"] = 666 + + result = df.ix[0, "b"] + self.assertTrue(is_integer(result)) + result = df.loc[0, "b"] + self.assertTrue(is_integer(result)) + + expected = Series([666], [0], name='b') + result = df.ix[[0], "b"] + assert_series_equal(result, expected) + result = df.loc[[0], "b"] + assert_series_equal(result, expected) + def test_irow(self): df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2)) @@ -2159,9 +2182,13 @@ def test_index_namedtuple(self): index = Index([idx1, idx2], name="composite_index", tupleize_cols=False) df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"]) + result = df.ix[IndexType("foo", "bar")]["A"] self.assertEqual(result, 1) + result = df.loc[IndexType("foo", "bar")]["A"] + self.assertEqual(result, 1) + def test_boolean_indexing(self): idx = lrange(3) cols = ['A', 'B', 'C']