diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt index c19a3951ac359..0d4dbae07413a 100644 --- a/doc/source/v0.14.1.txt +++ b/doc/source/v0.14.1.txt @@ -74,3 +74,4 @@ Bug Fixes - Bug in ``DataFrame.query()``/``eval`` where local string variables with the @ sign were being treated as temporaries attempting to be deleted (:issue:`7300`). +- Bug in ``Float64Index`` which didn't allow duplicates (:issue:`7149`). diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6d7566bfd84ae..fb39aced6ec1d 100755 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1539,7 +1539,7 @@ def get_value(self, index, col, takeable=False): value : scalar value """ - if takeable is True: + if takeable: series = self._iget_item_cache(col) return series.values[index] diff --git a/pandas/core/index.py b/pandas/core/index.py index 6ac0b7004af6c..10e5558e12542 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1713,52 +1713,43 @@ def slice_locs(self, start=None, end=None): """ is_unique = self.is_unique - if start is None: - start_slice = 0 - else: - try: - start_slice = self.get_loc(start) - - if not is_unique: - # get_loc will return a boolean array for non_uniques - # if we are not monotonic - if isinstance(start_slice, (ABCSeries, np.ndarray)): - raise KeyError("cannot peform a slice operation " - "on a non-unique non-monotonic index") - - if isinstance(start_slice, slice): - start_slice = start_slice.start + def _get_slice(starting_value, offset, search_side, slice_property, + search_value): + if search_value is None: + return starting_value - except KeyError: - if self.is_monotonic: - start_slice = self.searchsorted(start, side='left') - else: - raise - - if end is None: - end_slice = len(self) - else: try: - end_slice = self.get_loc(end) + slc = self.get_loc(search_value) if not is_unique: # get_loc will return a boolean array for non_uniques - if isinstance(end_slice, np.ndarray): - raise KeyError("cannot perform a slice operation " + # if we are not monotonic + if isinstance(slc, np.ndarray): + raise KeyError("cannot peform a slice operation " "on a non-unique non-monotonic index") - if isinstance(end_slice, slice): - end_slice = end_slice.stop + if isinstance(slc, slice): + slc = getattr(slc, slice_property) else: - end_slice += 1 + slc += offset except KeyError: if self.is_monotonic: - end_slice = self.searchsorted(end, side='right') + if not is_unique: + slc = search_value + else: + slc = self.searchsorted(search_value, + side=search_side) else: raise + return slc + + start_slice = _get_slice(0, offset=0, search_side='left', + slice_property='start', search_value=start) + end_slice = _get_slice(len(self), offset=1, search_side='right', + slice_property='stop', search_value=end) return start_slice, end_slice @@ -1994,11 +1985,12 @@ def _convert_slice_indexer(self, key, typ=None): """ convert a slice indexer, by definition these are labels unless we are iloc """ if typ == 'iloc': - return super(Float64Index, self)._convert_slice_indexer(key, typ=typ) + return super(Float64Index, self)._convert_slice_indexer(key, + typ=typ) # allow floats here - self._validate_slicer( - key, lambda v: v is None or is_integer(v) or is_float(v)) + validator = lambda v: v is None or is_integer(v) or is_float(v) + self._validate_slicer(key, validator) # translate to locations return self.slice_indexer(key.start, key.stop, key.step) diff --git a/pandas/index.pyx b/pandas/index.pyx index ae209b58136e1..4f8e780ded808 100644 --- a/pandas/index.pyx +++ b/pandas/index.pyx @@ -401,6 +401,35 @@ cdef class Float64Engine(IndexEngine): cdef _get_index_values(self): return algos.ensure_float64(self.vgetter()) + cdef _maybe_get_bool_indexer(self, object val): + cdef: + ndarray[uint8_t] indexer + ndarray[float64_t] values + int count = 0 + Py_ssize_t i, n + int last_true + + values = self._get_index_values() + n = len(values) + + result = np.empty(n, dtype=bool) + indexer = result.view(np.uint8) + + for i in range(n): + if values[i] == val: + count += 1 + indexer[i] = 1 + last_true = i + else: + indexer[i] = 0 + + if count == 0: + raise KeyError(val) + if count == 1: + return last_true + + return result + def _call_monotonic(self, values): return algos.is_monotonic_float64(values) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 9b727d5752097..7e87c07911353 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1747,6 +1747,41 @@ def test_reversed_reindex_ffill_raises(self): self.assertRaises(ValueError, df.reindex, dr[::-1], method='ffill') self.assertRaises(ValueError, df.reindex, dr[::-1], method='bfill') + def test_getitem_ix_float_duplicates(self): + df = pd.DataFrame(np.random.randn(3, 3), + index=[0.1, 0.2, 0.2], columns=list('abc')) + expect = df.iloc[1:] + tm.assert_frame_equal(df.loc[0.2], expect) + tm.assert_frame_equal(df.ix[0.2], expect) + + expect = df.iloc[1:, 0] + tm.assert_series_equal(df.loc[0.2, 'a'], expect) + + df.index = [1, 0.2, 0.2] + expect = df.iloc[1:] + tm.assert_frame_equal(df.loc[0.2], expect) + tm.assert_frame_equal(df.ix[0.2], expect) + + expect = df.iloc[1:, 0] + tm.assert_series_equal(df.loc[0.2, 'a'], expect) + + df = pd.DataFrame(np.random.randn(4, 3), + index=[1, 0.2, 0.2, 1], columns=list('abc')) + expect = df.iloc[1:-1] + tm.assert_frame_equal(df.loc[0.2], expect) + tm.assert_frame_equal(df.ix[0.2], expect) + + expect = df.iloc[1:-1, 0] + tm.assert_series_equal(df.loc[0.2, 'a'], expect) + + df.index = [0.1, 0.2, 2, 0.2] + expect = df.iloc[[1, -1]] + tm.assert_frame_equal(df.loc[0.2], expect) + tm.assert_frame_equal(df.ix[0.2], expect) + + expect = df.iloc[[1, -1], 0] + tm.assert_series_equal(df.loc[0.2, 'a'], expect) + _seriesd = tm.getSeriesData() _tsd = tm.getTimeSeriesData()