Skip to content

Commit 6cc875c

Browse files
committed
Merge pull request #8523 from jreback/regr
BUG/REGR: bool-like Indexes not properly coercing to object (GH8522)
2 parents 89e6092 + 574facc commit 6cc875c

File tree

7 files changed

+73
-15
lines changed

7 files changed

+73
-15
lines changed

doc/source/v0.15.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -642,7 +642,7 @@ Internal Refactoring
642642

643643
In 0.15.0 ``Index`` has internally been refactored to no longer sub-class ``ndarray``
644644
but instead subclass ``PandasObject``, similarly to the rest of the pandas objects. This change allows very easy sub-classing and creation of new index types. This should be
645-
a transparent change with only very limited API implications (:issue:`5080`, :issue:`7439`, :issue:`7796`, :issue:`8024`, :issue:`8367`, :issue:`7997`)
645+
a transparent change with only very limited API implications (:issue:`5080`, :issue:`7439`, :issue:`7796`, :issue:`8024`, :issue:`8367`, :issue:`7997`, :issue:`8522`)
646646

647647
- you may need to unpickle pandas version < 0.15.0 pickles using ``pd.read_pickle`` rather than ``pickle.load``. See :ref:`pickle docs <io.pickle>`
648648
- when plotting with a ``PeriodIndex``. The ``matplotlib`` internal axes will now be arrays of ``Period`` rather than a ``PeriodIndex``. (this is similar to how a ``DatetimeIndex`` passes arrays of ``datetimes`` now)

pandas/core/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,7 @@ def searchsorted(self, key, side='left'):
499499
@Appender(_shared_docs['drop_duplicates'] % _indexops_doc_kwargs)
500500
def drop_duplicates(self, take_last=False, inplace=False):
501501
duplicated = self.duplicated(take_last=take_last)
502-
result = self[~duplicated.values]
502+
result = self[~(duplicated.values).astype(bool)]
503503
if inplace:
504504
return self._update_inplace(result)
505505
else:

pandas/core/index.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -148,16 +148,16 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False,
148148
data = np.array(data, dtype=dtype, copy=copy)
149149
except TypeError:
150150
pass
151-
elif isinstance(data, PeriodIndex):
152-
return PeriodIndex(data, copy=copy, name=name, **kwargs)
153151

152+
# maybe coerce to a sub-class
153+
if isinstance(data, PeriodIndex):
154+
return PeriodIndex(data, copy=copy, name=name, **kwargs)
154155
if issubclass(data.dtype.type, np.integer):
155156
return Int64Index(data, copy=copy, dtype=dtype, name=name)
156-
if issubclass(data.dtype.type, np.floating):
157+
elif issubclass(data.dtype.type, np.floating):
157158
return Float64Index(data, copy=copy, dtype=dtype, name=name)
158-
159-
if com.is_bool_dtype(data):
160-
subarr = data
159+
elif issubclass(data.dtype.type, np.bool) or com.is_bool_dtype(data):
160+
subarr = data.astype('object')
161161
else:
162162
subarr = com._asarray_tuplesafe(data, dtype=object)
163163

@@ -583,6 +583,9 @@ def is_unique(self):
583583
""" return if the index has unique values """
584584
return self._engine.is_unique
585585

586+
def is_boolean(self):
587+
return self.inferred_type in ['boolean']
588+
586589
def is_integer(self):
587590
return self.inferred_type in ['integer']
588591

@@ -592,6 +595,9 @@ def is_floating(self):
592595
def is_numeric(self):
593596
return self.inferred_type in ['integer', 'floating']
594597

598+
def is_object(self):
599+
return self.dtype == np.object_
600+
595601
def is_mixed(self):
596602
return 'mixed' in self.inferred_type
597603

pandas/tests/test_base.py

+36-6
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ def f():
180180

181181
class Ops(tm.TestCase):
182182
def setUp(self):
183+
self.bool_index = tm.makeBoolIndex(10)
183184
self.int_index = tm.makeIntIndex(10)
184185
self.float_index = tm.makeFloatIndex(10)
185186
self.dt_index = tm.makeDateIndex(10)
@@ -189,14 +190,15 @@ def setUp(self):
189190

190191
arr = np.random.randn(10)
191192
self.int_series = Series(arr, index=self.int_index)
192-
self.float_series = Series(arr, index=self.int_index)
193+
self.float_series = Series(arr, index=self.float_index)
193194
self.dt_series = Series(arr, index=self.dt_index)
194195
self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True)
195196
self.period_series = Series(arr, index=self.period_index)
196197
self.string_series = Series(arr, index=self.string_index)
197198

198-
types = ['int','float','dt', 'dt_tz', 'period','string']
199-
self.objs = [ getattr(self,"{0}_{1}".format(t,f)) for t in types for f in ['index','series'] ]
199+
types = ['bool','int','float','dt', 'dt_tz', 'period','string']
200+
fmts = [ "{0}_{1}".format(t,f) for t in types for f in ['index','series'] ]
201+
self.objs = [ getattr(self,f) for f in fmts if getattr(self,f,None) is not None ]
200202

201203
def check_ops_properties(self, props, filter=None, ignore_failures=False):
202204
for op in props:
@@ -340,6 +342,9 @@ def test_value_counts_unique_nunique(self):
340342
# freq must be specified because repeat makes freq ambiguous
341343
expected_index = o[::-1]
342344
o = klass(np.repeat(values, range(1, len(o) + 1)), freq=o.freq)
345+
# don't test boolean
346+
elif isinstance(o,Index) and o.is_boolean():
347+
continue
343348
elif isinstance(o, Index):
344349
expected_index = values[::-1]
345350
o = klass(np.repeat(values, range(1, len(o) + 1)))
@@ -366,6 +371,10 @@ def test_value_counts_unique_nunique(self):
366371
klass = type(o)
367372
values = o.values
368373

374+
if isinstance(o,Index) and o.is_boolean():
375+
# don't test boolean
376+
continue
377+
369378
if ((isinstance(o, Int64Index) and not isinstance(o,
370379
(DatetimeIndex, PeriodIndex)))):
371380
# skips int64 because it doesn't allow to include nan or None
@@ -537,24 +546,37 @@ def test_value_counts_inferred(self):
537546

538547
def test_factorize(self):
539548
for o in self.objs:
540-
exp_arr = np.array(range(len(o)))
549+
550+
if isinstance(o,Index) and o.is_boolean():
551+
exp_arr = np.array([0,1] + [0] * 8)
552+
exp_uniques = o
553+
exp_uniques = Index([False,True])
554+
else:
555+
exp_arr = np.array(range(len(o)))
556+
exp_uniques = o
541557
labels, uniques = o.factorize()
542558

543559
self.assert_numpy_array_equal(labels, exp_arr)
544560
if isinstance(o, Series):
545561
expected = Index(o.values)
546562
self.assert_numpy_array_equal(uniques, expected)
547563
else:
548-
self.assertTrue(uniques.equals(o))
564+
self.assertTrue(uniques.equals(exp_uniques))
549565

550566
for o in self.objs:
567+
568+
# don't test boolean
569+
if isinstance(o,Index) and o.is_boolean():
570+
continue
571+
551572
# sort by value, and create duplicates
552573
if isinstance(o, Series):
553574
o.sort()
575+
n = o.iloc[5:].append(o)
554576
else:
555577
indexer = o.argsort()
556578
o = o.take(indexer)
557-
n = o[5:].append(o)
579+
n = o[5:].append(o)
558580

559581
exp_arr = np.array([5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
560582
labels, uniques = n.factorize(sort=True)
@@ -582,6 +604,14 @@ def test_duplicated_drop_duplicates(self):
582604
for original in self.objs:
583605

584606
if isinstance(original, Index):
607+
608+
# special case
609+
if original.is_boolean():
610+
result = original.drop_duplicates()
611+
expected = Index([False,True])
612+
tm.assert_index_equal(result, expected)
613+
continue
614+
585615
# original doesn't have duplicates
586616
expected = Index([False] * len(original))
587617
tm.assert_index_equal(original.duplicated(), expected)

pandas/tests/test_index.py

+8
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ def setUp(self):
9494
dateIndex = tm.makeDateIndex(100),
9595
intIndex = tm.makeIntIndex(100),
9696
floatIndex = tm.makeFloatIndex(100),
97+
boolIndex = Index([True,False]),
9798
empty = Index([]),
9899
tuples = MultiIndex.from_tuples(lzip(['foo', 'bar', 'baz'],
99100
[1, 2, 3]))
@@ -732,6 +733,13 @@ def test_is_numeric(self):
732733
self.assertTrue(self.intIndex.is_numeric())
733734
self.assertTrue(self.floatIndex.is_numeric())
734735

736+
def test_is_object(self):
737+
self.assertTrue(self.strIndex.is_object())
738+
self.assertTrue(self.boolIndex.is_object())
739+
self.assertFalse(self.intIndex.is_object())
740+
self.assertFalse(self.dateIndex.is_object())
741+
self.assertFalse(self.floatIndex.is_object())
742+
735743
def test_is_all_dates(self):
736744
self.assertTrue(self.dateIndex.is_all_dates)
737745
self.assertFalse(self.strIndex.is_all_dates)

pandas/tests/test_series.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -1222,7 +1222,7 @@ def test_getitem_dups(self):
12221222
expected = Series([3,4],index=['C','C'],dtype=np.int64)
12231223
result = s['C']
12241224
assert_series_equal(result, expected)
1225-
1225+
12261226
def test_getitem_dataframe(self):
12271227
rng = list(range(10))
12281228
s = pd.Series(10, index=rng)
@@ -1817,6 +1817,13 @@ def test_drop(self):
18171817
# bad axis
18181818
self.assertRaises(ValueError, s.drop, 'one', axis='columns')
18191819

1820+
# GH 8522
1821+
s = Series([2,3], index=[True, False])
1822+
self.assertTrue(s.index.is_object())
1823+
result = s.drop(True)
1824+
expected = Series([3],index=[False])
1825+
assert_series_equal(result,expected)
1826+
18201827
def test_ix_setitem(self):
18211828
inds = self.series.index[[3, 4, 7]]
18221829

pandas/util/testing.py

+7
Original file line numberDiff line numberDiff line change
@@ -738,6 +738,13 @@ def makeStringIndex(k=10):
738738
def makeUnicodeIndex(k=10):
739739
return Index([randu(10) for _ in range(k)])
740740

741+
def makeBoolIndex(k=10):
742+
if k == 1:
743+
return Index([True])
744+
elif k == 2:
745+
return Index([False,True])
746+
return Index([False,True] + [False]*(k-2))
747+
741748
def makeIntIndex(k=10):
742749
return Index(lrange(k))
743750

0 commit comments

Comments
 (0)