From 3e6cc341a39b161085d7b109f9d5541c12b543ce Mon Sep 17 00:00:00 2001 From: Stephen Lin Date: Tue, 22 Jan 2013 20:19:51 -0500 Subject: [PATCH] BUG: Mismatch between get and set behavior for slices of floating indices --- pandas/core/indexing.py | 44 ++++++++++++++++++++++---------------- pandas/tests/test_frame.py | 39 +++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 19 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b7c037ae65206..8f812252134a1 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -435,23 +435,27 @@ def _convert_to_indexer(self, obj, axis=0): if isinstance(obj, slice): ltype = labels.inferred_type - if ltype == 'floating': - int_slice = _is_int_slice(obj) - else: - # floats that are within tolerance of int used - int_slice = _is_index_slice(obj) + # in case of providing all floats, use label-based indexing + float_slice = (labels.inferred_type == 'floating' + and _is_float_slice(obj)) + + # floats that are within tolerance of int used as positions + int_slice = _is_index_slice(obj) null_slice = obj.start is None and obj.stop is None - # could have integers in the first level of the MultiIndex + + # could have integers in the first level of the MultiIndex, + # in which case we wouldn't want to do position-based slicing position_slice = (int_slice and not ltype == 'integer' - and not isinstance(labels, MultiIndex)) + and not isinstance(labels, MultiIndex) + and not float_slice) start, stop = obj.start, obj.stop # last ditch effort: if we are mixed and have integers try: - if 'mixed' in ltype and int_slice: + if position_slice and 'mixed' in ltype: if start is not None: i = labels.get_loc(start) if stop is not None: @@ -468,7 +472,7 @@ def _convert_to_indexer(self, obj, axis=0): indexer = labels.slice_indexer(start, stop, obj.step) except Exception: if _is_index_slice(obj): - if labels.inferred_type == 'integer': + if ltype == 'integer': raise indexer = obj else: @@ -539,34 +543,36 @@ def _get_slice_axis(self, slice_obj, axis=0): labels = obj._get_axis(axis) - int_slice = _is_index_slice(slice_obj) - - start = slice_obj.start - stop = slice_obj.stop + ltype = labels.inferred_type # in case of providing all floats, use label-based indexing float_slice = (labels.inferred_type == 'floating' and _is_float_slice(slice_obj)) + # floats that are within tolerance of int used as positions + int_slice = _is_index_slice(slice_obj) + null_slice = slice_obj.start is None and slice_obj.stop is None - # could have integers in the first level of the MultiIndex, in which - # case we wouldn't want to do position-based slicing + # could have integers in the first level of the MultiIndex, + # in which case we wouldn't want to do position-based slicing position_slice = (int_slice - and labels.inferred_type != 'integer' + and not ltype == 'integer' and not isinstance(labels, MultiIndex) and not float_slice) + start, stop = slice_obj.start, slice_obj.stop + # last ditch effort: if we are mixed and have integers try: - if 'mixed' in labels.inferred_type and int_slice: + if position_slice and 'mixed' in ltype: if start is not None: i = labels.get_loc(start) if stop is not None: j = labels.get_loc(stop) position_slice = False except KeyError: - if labels.inferred_type == 'mixed-integer-float': + if ltype == 'mixed-integer-float': raise if null_slice or position_slice: @@ -576,7 +582,7 @@ def _get_slice_axis(self, slice_obj, axis=0): indexer = labels.slice_indexer(start, stop, slice_obj.step) except Exception: if _is_index_slice(slice_obj): - if labels.inferred_type == 'integer': + if ltype == 'integer': raise indexer = slice_obj else: diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 00c9aa3f02761..e8b33e51fbba7 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1183,6 +1183,45 @@ def test_getitem_setitem_float_labels(self): self.assertRaises(Exception, df.ix.__getitem__, slice(1, 2)) self.assertRaises(Exception, df.ix.__setitem__, slice(1, 2), 0) + # #2727 + index = Index([1.0, 2.5, 3.5, 4.5, 5.0]) + df = DataFrame(np.random.randn(5, 5), index=index) + + # positional slicing! + result = df.ix[1.0:5] + expected = df.reindex([2.5, 3.5, 4.5, 5.0]) + assert_frame_equal(result, expected) + self.assertEqual(len(result), 4) + + # positional again + result = df.ix[4:5] + expected = df.reindex([5.0]) + assert_frame_equal(result, expected) + self.assertEqual(len(result), 1) + + # label-based + result = df.ix[1.0:5.0] + expected = df.reindex([1.0, 2.5, 3.5, 4.5, 5.0]) + assert_frame_equal(result, expected) + self.assertEqual(len(result), 5) + + cp = df.copy() + # positional slicing! + cp.ix[1.0:5] = 0 + self.assert_((cp.ix[1.0:5] == 0).values.all()) + self.assert_((cp.ix[0:1] == df.ix[0:1]).values.all()) + + cp = df.copy() + # positional again + cp.ix[4:5] = 0 + self.assert_((cp.ix[4:5] == 0).values.all()) + self.assert_((cp.ix[0:4] == df.ix[0:4]).values.all()) + + cp = df.copy() + # label-based + cp.ix[1.0:5.0] = 0 + self.assert_((cp.ix[1.0:5.0] == 0).values.all()) + def test_setitem_single_column_mixed(self): df = DataFrame(randn(5, 3), index=['a', 'b', 'c', 'd', 'e'], columns=['foo', 'bar', 'baz'])