Merge pull request #7149 from cpcloud/float64index-dup-fix-7143

cpcloud · cpcloud · commit b0c811085e24 · 2014-06-01T18:08:10.000-04:00
BUG: allow dup indexing with Float64Index
diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt
@@ -74,3 +74,4 @@ Bug Fixes
 - Bug in ``DataFrame.query()``/``eval`` where local string variables with the @
   sign were being treated as temporaries attempting to be deleted
   (:issue:`7300`).
+- Bug in ``Float64Index`` which didn't allow duplicates (:issue:`7149`).
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1539,7 +1539,7 @@ def get_value(self, index, col, takeable=False):
         value : scalar value
         """
 
-        if takeable is True:
+        if takeable:
             series = self._iget_item_cache(col)
             return series.values[index]
 
diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -1713,52 +1713,43 @@ def slice_locs(self, start=None, end=None):
         """
 
         is_unique = self.is_unique
-        if start is None:
-            start_slice = 0
-        else:
-            try:
-                start_slice = self.get_loc(start)
-
-                if not is_unique:
 
-                    # get_loc will return a boolean array for non_uniques
-                    # if we are not monotonic
-                    if isinstance(start_slice, (ABCSeries, np.ndarray)):
-                        raise KeyError("cannot peform a slice operation "
-                                       "on a non-unique non-monotonic index")
-
-                if isinstance(start_slice, slice):
-                    start_slice = start_slice.start
+        def _get_slice(starting_value, offset, search_side, slice_property,
+                       search_value):
+            if search_value is None:
+                return starting_value
 
-            except KeyError:
-                if self.is_monotonic:
-                    start_slice = self.searchsorted(start, side='left')
-                else:
-                    raise
-
-        if end is None:
-            end_slice = len(self)
-        else:
             try:
-                end_slice = self.get_loc(end)
+                slc = self.get_loc(search_value)
 
                 if not is_unique:
 
                     # get_loc will return a boolean array for non_uniques
-                    if isinstance(end_slice, np.ndarray):
-                        raise KeyError("cannot perform a slice operation "
+                    # if we are not monotonic
+                    if isinstance(slc, np.ndarray):
+                        raise KeyError("cannot peform a slice operation "
                                        "on a non-unique non-monotonic index")
 
-                if isinstance(end_slice, slice):
-                    end_slice = end_slice.stop
+                if isinstance(slc, slice):
+                    slc = getattr(slc, slice_property)
                 else:
-                    end_slice += 1
+                    slc += offset
 
             except KeyError:
                 if self.is_monotonic:
-                    end_slice = self.searchsorted(end, side='right')
+                    if not is_unique:
+                        slc = search_value
+                    else:
+                        slc = self.searchsorted(search_value,
+                                                side=search_side)
                 else:
                     raise
+            return slc
+
+        start_slice = _get_slice(0, offset=0, search_side='left',
+                                 slice_property='start', search_value=start)
+        end_slice = _get_slice(len(self), offset=1, search_side='right',
+                               slice_property='stop', search_value=end)
 
         return start_slice, end_slice
 
@@ -1994,11 +1985,12 @@ def _convert_slice_indexer(self, key, typ=None):
         """ convert a slice indexer, by definition these are labels
             unless we are iloc """
         if typ == 'iloc':
-            return super(Float64Index, self)._convert_slice_indexer(key, typ=typ)
+            return super(Float64Index, self)._convert_slice_indexer(key,
+                                                                    typ=typ)
 
         # allow floats here
-        self._validate_slicer(
-            key, lambda v: v is None or is_integer(v) or is_float(v))
+        validator = lambda v: v is None or is_integer(v) or is_float(v)
+        self._validate_slicer(key, validator)
 
         # translate to locations
         return self.slice_indexer(key.start, key.stop, key.step)
diff --git a/pandas/index.pyx b/pandas/index.pyx
@@ -401,6 +401,35 @@ cdef class Float64Engine(IndexEngine):
     cdef _get_index_values(self):
         return algos.ensure_float64(self.vgetter())
 
+    cdef _maybe_get_bool_indexer(self, object val):
+        cdef:
+            ndarray[uint8_t] indexer
+            ndarray[float64_t] values
+            int count = 0
+            Py_ssize_t i, n
+            int last_true
+
+        values = self._get_index_values()
+        n = len(values)
+
+        result = np.empty(n, dtype=bool)
+        indexer = result.view(np.uint8)
+
+        for i in range(n):
+            if values[i] == val:
+                count += 1
+                indexer[i] = 1
+                last_true = i
+            else:
+                indexer[i] = 0
+
+        if count == 0:
+            raise KeyError(val)
+        if count == 1:
+            return last_true
+
+        return result
+
     def _call_monotonic(self, values):
         return algos.is_monotonic_float64(values)
 
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -1747,6 +1747,41 @@ def test_reversed_reindex_ffill_raises(self):
         self.assertRaises(ValueError, df.reindex, dr[::-1], method='ffill')
         self.assertRaises(ValueError, df.reindex, dr[::-1], method='bfill')
 
+    def test_getitem_ix_float_duplicates(self):
+        df = pd.DataFrame(np.random.randn(3, 3),
+                          index=[0.1, 0.2, 0.2], columns=list('abc'))
+        expect = df.iloc[1:]
+        tm.assert_frame_equal(df.loc[0.2], expect)
+        tm.assert_frame_equal(df.ix[0.2], expect)
+
+        expect = df.iloc[1:, 0]
+        tm.assert_series_equal(df.loc[0.2, 'a'], expect)
+
+        df.index = [1, 0.2, 0.2]
+        expect = df.iloc[1:]
+        tm.assert_frame_equal(df.loc[0.2], expect)
+        tm.assert_frame_equal(df.ix[0.2], expect)
+
+        expect = df.iloc[1:, 0]
+        tm.assert_series_equal(df.loc[0.2, 'a'], expect)
+
+        df = pd.DataFrame(np.random.randn(4, 3),
+                          index=[1, 0.2, 0.2, 1], columns=list('abc'))
+        expect = df.iloc[1:-1]
+        tm.assert_frame_equal(df.loc[0.2], expect)
+        tm.assert_frame_equal(df.ix[0.2], expect)
+
+        expect = df.iloc[1:-1, 0]
+        tm.assert_series_equal(df.loc[0.2, 'a'], expect)
+
+        df.index = [0.1, 0.2, 2, 0.2]
+        expect = df.iloc[[1, -1]]
+        tm.assert_frame_equal(df.loc[0.2], expect)
+        tm.assert_frame_equal(df.ix[0.2], expect)
+
+        expect = df.iloc[[1, -1], 0]
+        tm.assert_series_equal(df.loc[0.2, 'a'], expect)
+
 
 _seriesd = tm.getSeriesData()
 _tsd = tm.getTimeSeriesData()