Skip to content

Commit b0c8110

Browse files
committed
Merge pull request #7149 from cpcloud/float64index-dup-fix-7143
BUG: allow dup indexing with Float64Index
2 parents 12735f9 + b05026b commit b0c8110

File tree

5 files changed

+92
-35
lines changed

5 files changed

+92
-35
lines changed

doc/source/v0.14.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,4 @@ Bug Fixes
7474
- Bug in ``DataFrame.query()``/``eval`` where local string variables with the @
7575
sign were being treated as temporaries attempting to be deleted
7676
(:issue:`7300`).
77+
- Bug in ``Float64Index`` which didn't allow duplicates (:issue:`7149`).

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1539,7 +1539,7 @@ def get_value(self, index, col, takeable=False):
15391539
value : scalar value
15401540
"""
15411541

1542-
if takeable is True:
1542+
if takeable:
15431543
series = self._iget_item_cache(col)
15441544
return series.values[index]
15451545

pandas/core/index.py

+26-34
Original file line numberDiff line numberDiff line change
@@ -1713,52 +1713,43 @@ def slice_locs(self, start=None, end=None):
17131713
"""
17141714

17151715
is_unique = self.is_unique
1716-
if start is None:
1717-
start_slice = 0
1718-
else:
1719-
try:
1720-
start_slice = self.get_loc(start)
1721-
1722-
if not is_unique:
17231716

1724-
# get_loc will return a boolean array for non_uniques
1725-
# if we are not monotonic
1726-
if isinstance(start_slice, (ABCSeries, np.ndarray)):
1727-
raise KeyError("cannot peform a slice operation "
1728-
"on a non-unique non-monotonic index")
1729-
1730-
if isinstance(start_slice, slice):
1731-
start_slice = start_slice.start
1717+
def _get_slice(starting_value, offset, search_side, slice_property,
1718+
search_value):
1719+
if search_value is None:
1720+
return starting_value
17321721

1733-
except KeyError:
1734-
if self.is_monotonic:
1735-
start_slice = self.searchsorted(start, side='left')
1736-
else:
1737-
raise
1738-
1739-
if end is None:
1740-
end_slice = len(self)
1741-
else:
17421722
try:
1743-
end_slice = self.get_loc(end)
1723+
slc = self.get_loc(search_value)
17441724

17451725
if not is_unique:
17461726

17471727
# get_loc will return a boolean array for non_uniques
1748-
if isinstance(end_slice, np.ndarray):
1749-
raise KeyError("cannot perform a slice operation "
1728+
# if we are not monotonic
1729+
if isinstance(slc, np.ndarray):
1730+
raise KeyError("cannot peform a slice operation "
17501731
"on a non-unique non-monotonic index")
17511732

1752-
if isinstance(end_slice, slice):
1753-
end_slice = end_slice.stop
1733+
if isinstance(slc, slice):
1734+
slc = getattr(slc, slice_property)
17541735
else:
1755-
end_slice += 1
1736+
slc += offset
17561737

17571738
except KeyError:
17581739
if self.is_monotonic:
1759-
end_slice = self.searchsorted(end, side='right')
1740+
if not is_unique:
1741+
slc = search_value
1742+
else:
1743+
slc = self.searchsorted(search_value,
1744+
side=search_side)
17601745
else:
17611746
raise
1747+
return slc
1748+
1749+
start_slice = _get_slice(0, offset=0, search_side='left',
1750+
slice_property='start', search_value=start)
1751+
end_slice = _get_slice(len(self), offset=1, search_side='right',
1752+
slice_property='stop', search_value=end)
17621753

17631754
return start_slice, end_slice
17641755

@@ -1994,11 +1985,12 @@ def _convert_slice_indexer(self, key, typ=None):
19941985
""" convert a slice indexer, by definition these are labels
19951986
unless we are iloc """
19961987
if typ == 'iloc':
1997-
return super(Float64Index, self)._convert_slice_indexer(key, typ=typ)
1988+
return super(Float64Index, self)._convert_slice_indexer(key,
1989+
typ=typ)
19981990

19991991
# allow floats here
2000-
self._validate_slicer(
2001-
key, lambda v: v is None or is_integer(v) or is_float(v))
1992+
validator = lambda v: v is None or is_integer(v) or is_float(v)
1993+
self._validate_slicer(key, validator)
20021994

20031995
# translate to locations
20041996
return self.slice_indexer(key.start, key.stop, key.step)

pandas/index.pyx

+29
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,35 @@ cdef class Float64Engine(IndexEngine):
401401
cdef _get_index_values(self):
402402
return algos.ensure_float64(self.vgetter())
403403

404+
cdef _maybe_get_bool_indexer(self, object val):
405+
cdef:
406+
ndarray[uint8_t] indexer
407+
ndarray[float64_t] values
408+
int count = 0
409+
Py_ssize_t i, n
410+
int last_true
411+
412+
values = self._get_index_values()
413+
n = len(values)
414+
415+
result = np.empty(n, dtype=bool)
416+
indexer = result.view(np.uint8)
417+
418+
for i in range(n):
419+
if values[i] == val:
420+
count += 1
421+
indexer[i] = 1
422+
last_true = i
423+
else:
424+
indexer[i] = 0
425+
426+
if count == 0:
427+
raise KeyError(val)
428+
if count == 1:
429+
return last_true
430+
431+
return result
432+
404433
def _call_monotonic(self, values):
405434
return algos.is_monotonic_float64(values)
406435

pandas/tests/test_frame.py

+35
Original file line numberDiff line numberDiff line change
@@ -1747,6 +1747,41 @@ def test_reversed_reindex_ffill_raises(self):
17471747
self.assertRaises(ValueError, df.reindex, dr[::-1], method='ffill')
17481748
self.assertRaises(ValueError, df.reindex, dr[::-1], method='bfill')
17491749

1750+
def test_getitem_ix_float_duplicates(self):
1751+
df = pd.DataFrame(np.random.randn(3, 3),
1752+
index=[0.1, 0.2, 0.2], columns=list('abc'))
1753+
expect = df.iloc[1:]
1754+
tm.assert_frame_equal(df.loc[0.2], expect)
1755+
tm.assert_frame_equal(df.ix[0.2], expect)
1756+
1757+
expect = df.iloc[1:, 0]
1758+
tm.assert_series_equal(df.loc[0.2, 'a'], expect)
1759+
1760+
df.index = [1, 0.2, 0.2]
1761+
expect = df.iloc[1:]
1762+
tm.assert_frame_equal(df.loc[0.2], expect)
1763+
tm.assert_frame_equal(df.ix[0.2], expect)
1764+
1765+
expect = df.iloc[1:, 0]
1766+
tm.assert_series_equal(df.loc[0.2, 'a'], expect)
1767+
1768+
df = pd.DataFrame(np.random.randn(4, 3),
1769+
index=[1, 0.2, 0.2, 1], columns=list('abc'))
1770+
expect = df.iloc[1:-1]
1771+
tm.assert_frame_equal(df.loc[0.2], expect)
1772+
tm.assert_frame_equal(df.ix[0.2], expect)
1773+
1774+
expect = df.iloc[1:-1, 0]
1775+
tm.assert_series_equal(df.loc[0.2, 'a'], expect)
1776+
1777+
df.index = [0.1, 0.2, 2, 0.2]
1778+
expect = df.iloc[[1, -1]]
1779+
tm.assert_frame_equal(df.loc[0.2], expect)
1780+
tm.assert_frame_equal(df.ix[0.2], expect)
1781+
1782+
expect = df.iloc[[1, -1], 0]
1783+
tm.assert_series_equal(df.loc[0.2, 'a'], expect)
1784+
17501785

17511786
_seriesd = tm.getSeriesData()
17521787
_tsd = tm.getTimeSeriesData()

0 commit comments

Comments
 (0)