Skip to content

BUG: allow dup indexing with Float64Index #7149

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 1, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/v0.14.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,4 @@ Bug Fixes
- Bug in ``DataFrame.query()``/``eval`` where local string variables with the @
sign were being treated as temporaries attempting to be deleted
(:issue:`7300`).
- Bug in ``Float64Index`` which didn't allow duplicates (:issue:`7149`).
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1539,7 +1539,7 @@ def get_value(self, index, col, takeable=False):
value : scalar value
"""

if takeable is True:
if takeable:
series = self._iget_item_cache(col)
return series.values[index]

Expand Down
60 changes: 26 additions & 34 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1713,52 +1713,43 @@ def slice_locs(self, start=None, end=None):
"""

is_unique = self.is_unique
if start is None:
start_slice = 0
else:
try:
start_slice = self.get_loc(start)

if not is_unique:

# get_loc will return a boolean array for non_uniques
# if we are not monotonic
if isinstance(start_slice, (ABCSeries, np.ndarray)):
raise KeyError("cannot peform a slice operation "
"on a non-unique non-monotonic index")

if isinstance(start_slice, slice):
start_slice = start_slice.start
def _get_slice(starting_value, offset, search_side, slice_property,
search_value):
if search_value is None:
return starting_value

except KeyError:
if self.is_monotonic:
start_slice = self.searchsorted(start, side='left')
else:
raise

if end is None:
end_slice = len(self)
else:
try:
end_slice = self.get_loc(end)
slc = self.get_loc(search_value)

if not is_unique:

# get_loc will return a boolean array for non_uniques
if isinstance(end_slice, np.ndarray):
raise KeyError("cannot perform a slice operation "
# if we are not monotonic
if isinstance(slc, np.ndarray):
raise KeyError("cannot peform a slice operation "
"on a non-unique non-monotonic index")

if isinstance(end_slice, slice):
end_slice = end_slice.stop
if isinstance(slc, slice):
slc = getattr(slc, slice_property)
else:
end_slice += 1
slc += offset

except KeyError:
if self.is_monotonic:
end_slice = self.searchsorted(end, side='right')
if not is_unique:
slc = search_value
else:
slc = self.searchsorted(search_value,
side=search_side)
else:
raise
return slc

start_slice = _get_slice(0, offset=0, search_side='left',
slice_property='start', search_value=start)
end_slice = _get_slice(len(self), offset=1, search_side='right',
slice_property='stop', search_value=end)

return start_slice, end_slice

Expand Down Expand Up @@ -1994,11 +1985,12 @@ def _convert_slice_indexer(self, key, typ=None):
""" convert a slice indexer, by definition these are labels
unless we are iloc """
if typ == 'iloc':
return super(Float64Index, self)._convert_slice_indexer(key, typ=typ)
return super(Float64Index, self)._convert_slice_indexer(key,
typ=typ)

# allow floats here
self._validate_slicer(
key, lambda v: v is None or is_integer(v) or is_float(v))
validator = lambda v: v is None or is_integer(v) or is_float(v)
self._validate_slicer(key, validator)

# translate to locations
return self.slice_indexer(key.start, key.stop, key.step)
Expand Down
29 changes: 29 additions & 0 deletions pandas/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,35 @@ cdef class Float64Engine(IndexEngine):
cdef _get_index_values(self):
return algos.ensure_float64(self.vgetter())

cdef _maybe_get_bool_indexer(self, object val):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so this is only hit when the Float64Index is non-monotonic and non-unique?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and only if you try to slice it.... it works for single element access

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually nevermind ... it only raises if you try to pass a slice in

cdef:
ndarray[uint8_t] indexer
ndarray[float64_t] values
int count = 0
Py_ssize_t i, n
int last_true

values = self._get_index_values()
n = len(values)

result = np.empty(n, dtype=bool)
indexer = result.view(np.uint8)

for i in range(n):
if values[i] == val:
count += 1
indexer[i] = 1
last_true = i
else:
indexer[i] = 0

if count == 0:
raise KeyError(val)
if count == 1:
return last_true

return result

def _call_monotonic(self, values):
return algos.is_monotonic_float64(values)

Expand Down
35 changes: 35 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1747,6 +1747,41 @@ def test_reversed_reindex_ffill_raises(self):
self.assertRaises(ValueError, df.reindex, dr[::-1], method='ffill')
self.assertRaises(ValueError, df.reindex, dr[::-1], method='bfill')

def test_getitem_ix_float_duplicates(self):
df = pd.DataFrame(np.random.randn(3, 3),
index=[0.1, 0.2, 0.2], columns=list('abc'))
expect = df.iloc[1:]
tm.assert_frame_equal(df.loc[0.2], expect)
tm.assert_frame_equal(df.ix[0.2], expect)

expect = df.iloc[1:, 0]
tm.assert_series_equal(df.loc[0.2, 'a'], expect)

df.index = [1, 0.2, 0.2]
expect = df.iloc[1:]
tm.assert_frame_equal(df.loc[0.2], expect)
tm.assert_frame_equal(df.ix[0.2], expect)

expect = df.iloc[1:, 0]
tm.assert_series_equal(df.loc[0.2, 'a'], expect)

df = pd.DataFrame(np.random.randn(4, 3),
index=[1, 0.2, 0.2, 1], columns=list('abc'))
expect = df.iloc[1:-1]
tm.assert_frame_equal(df.loc[0.2], expect)
tm.assert_frame_equal(df.ix[0.2], expect)

expect = df.iloc[1:-1, 0]
tm.assert_series_equal(df.loc[0.2, 'a'], expect)

df.index = [0.1, 0.2, 2, 0.2]
expect = df.iloc[[1, -1]]
tm.assert_frame_equal(df.loc[0.2], expect)
tm.assert_frame_equal(df.ix[0.2], expect)

expect = df.iloc[[1, -1], 0]
tm.assert_series_equal(df.loc[0.2, 'a'], expect)


_seriesd = tm.getSeriesData()
_tsd = tm.getTimeSeriesData()
Expand Down