Skip to content

Commit 10a5171

Browse files
committed
BUG: disable position-based slicing altogether if a dtype=object contains any integer objects, augment infer_dtype, GH #798
1 parent d716b74 commit 10a5171

File tree

4 files changed

+33
-20
lines changed

4 files changed

+33
-20
lines changed

pandas/core/indexing.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -243,23 +243,25 @@ def _convert_to_indexer(self, obj, axis=0):
243243
and not labels.inferred_type == 'integer'
244244
and not isinstance(labels, MultiIndex))
245245

246+
start, stop = obj.start, obj.stop
246247

247248
# last ditch effort: if we are mixed and have integers
248249
try:
249-
if labels.inferred_type == 'mixed' and int_slice:
250-
if obj.start is not None:
251-
i = labels.get_loc(obj.start)
252-
if obj.stop is not None:
253-
j = labels.get_loc(obj.stop)
250+
if 'mixed' in labels.inferred_type and int_slice:
251+
if start is not None:
252+
i = labels.get_loc(start)
253+
if stop is not None:
254+
j = labels.get_loc(stop)
254255
position_slice = False
255256
except KeyError:
256-
pass
257+
if labels.inferred_type == 'mixed-integer':
258+
raise
257259

258260
if null_slice or position_slice:
259261
slicer = obj
260262
else:
261263
try:
262-
i, j = labels.slice_locs(obj.start, obj.stop)
264+
i, j = labels.slice_locs(start, stop)
263265
slicer = slice(i, j, obj.step)
264266
except Exception:
265267
if _is_index_slice(obj):
@@ -316,20 +318,22 @@ def _get_slice_axis(self, slice_obj, axis=0):
316318

317319
# could have integers in the first level of the MultiIndex, in which
318320
# case we wouldn't want to do position-based slicing
319-
position_slice = (int_slice and not labels.inferred_type == 'integer'
321+
position_slice = (int_slice
322+
and labels.inferred_type != 'integer'
320323
and not isinstance(labels, MultiIndex)
321324
and not float_slice)
322325

323326
# last ditch effort: if we are mixed and have integers
324327
try:
325-
if labels.inferred_type == 'mixed' and int_slice:
328+
if 'mixed' in labels.inferred_type and int_slice:
326329
if start is not None:
327330
i = labels.get_loc(start)
328331
if stop is not None:
329332
j = labels.get_loc(stop)
330333
position_slice = False
331334
except KeyError:
332-
pass
335+
if labels.inferred_type == 'mixed-integer':
336+
raise
333337

334338
if null_slice or position_slice:
335339
slicer = slice_obj

pandas/src/inference.pyx

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ except AttributeError:
2929
def infer_dtype(object _values):
3030
cdef:
3131
Py_ssize_t i, n
32-
object test_val
32+
object val
3333
ndarray values
3434

3535
if isinstance(_values, np.ndarray):
@@ -50,29 +50,34 @@ def infer_dtype(object _values):
5050
if values.dtype != np.object_:
5151
values = values.astype('O')
5252

53-
test_val = util.get_value_1d(values, 0)
53+
val = util.get_value_1d(values, 0)
5454

55-
if util.is_integer_object(test_val):
55+
if util.is_integer_object(val):
5656
if is_integer_array(values):
5757
return 'integer'
58-
59-
elif is_datetime(test_val):
58+
return 'mixed-integer'
59+
elif is_datetime(val):
6060
if is_datetime_array(values):
6161
return 'datetime'
6262

63-
elif util.is_float_object(test_val):
63+
elif util.is_float_object(val):
6464
if is_float_array(values):
6565

6666
return 'floating'
6767

68-
elif util.is_bool_object(test_val):
68+
elif util.is_bool_object(val):
6969
if is_bool_array(values):
7070
return 'boolean'
7171

72-
elif util.is_string_object(test_val):
72+
elif util.is_string_object(val):
7373
if is_string_array(values):
7474
return 'string'
7575

76+
for i in range(n):
77+
val = util.get_value_1d(values, i)
78+
if util.is_integer_object(val):
79+
return 'mixed-integer'
80+
7681
return 'mixed'
7782

7883
def infer_dtype_list(list values):

pandas/tests/test_frame.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -811,6 +811,10 @@ def test_getitem_setitem_float_labels(self):
811811
assert_frame_equal(result, expected)
812812
self.assertEqual(len(result), 2)
813813

814+
# this should raise an exception
815+
self.assertRaises(Exception, df.ix.__getitem__, slice(1, 2))
816+
self.assertRaises(Exception, df.ix.__setitem__, slice(1, 2), 0)
817+
814818
def test_setitem_single_column_mixed(self):
815819
df = DataFrame(randn(5, 3), index=['a', 'b', 'c', 'd', 'e'],
816820
columns=['foo', 'bar', 'baz'])

pandas/tests/test_tseries.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ def test_integers(self):
299299
arr = np.array([1, 2, 3, np.int64(4), np.int32(5), 'foo'],
300300
dtype='O')
301301
result = lib.infer_dtype(arr)
302-
self.assertEqual(result, 'mixed')
302+
self.assertEqual(result, 'mixed-integer')
303303

304304
arr = np.array([1, 2, 3, 4, 5], dtype='i4')
305305
result = lib.infer_dtype(arr)
@@ -330,7 +330,7 @@ def test_floats(self):
330330
arr = np.array([1, 2, 3, np.float64(4), np.float32(5), 'foo'],
331331
dtype='O')
332332
result = lib.infer_dtype(arr)
333-
self.assertEqual(result, 'mixed')
333+
self.assertEqual(result, 'mixed-integer')
334334

335335
arr = np.array([1, 2, 3, 4, 5], dtype='f4')
336336
result = lib.infer_dtype(arr)

0 commit comments

Comments
 (0)