Skip to content

Commit 6d635fc

Browse files
committed
Merge branch 'duplicated2' of https://github.com/sinhrks/pandas into sinhrks-duplicated2
2 parents cc33c32 + 83fd0b4 commit 6d635fc

File tree

6 files changed

+32
-18
lines changed

6 files changed

+32
-18
lines changed

doc/source/whatsnew/v0.16.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ Backwards incompatible API changes
2727

2828
.. _whatsnew_0160.api_breaking:
2929

30+
- ``Index.duplicated`` now returns `np.array(dtype=bool)` rather than `Index(dtype=object)` containing `bool` values. (:issue:`8875`)
31+
3032
Deprecations
3133
~~~~~~~~~~~~
3234

pandas/core/base.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313

1414

1515
_shared_docs = dict()
16-
_indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='')
16+
_indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='',
17+
duplicated='IndexOpsMixin')
1718

1819

1920
class StringMixin(object):
@@ -486,14 +487,14 @@ def searchsorted(self, key, side='left'):
486487
@Appender(_shared_docs['drop_duplicates'] % _indexops_doc_kwargs)
487488
def drop_duplicates(self, take_last=False, inplace=False):
488489
duplicated = self.duplicated(take_last=take_last)
489-
result = self[~(duplicated.values).astype(bool)]
490+
result = self[np.logical_not(duplicated)]
490491
if inplace:
491492
return self._update_inplace(result)
492493
else:
493494
return result
494495

495496
_shared_docs['duplicated'] = (
496-
"""Return boolean %(klass)s denoting duplicate values
497+
"""Return boolean %(duplicated)s denoting duplicate values
497498
498499
Parameters
499500
----------
@@ -502,7 +503,7 @@ def drop_duplicates(self, take_last=False, inplace=False):
502503
503504
Returns
504505
-------
505-
duplicated : %(klass)s
506+
duplicated : %(duplicated)s
506507
""")
507508

508509
@Appender(_shared_docs['duplicated'] % _indexops_doc_kwargs)
@@ -513,8 +514,7 @@ def duplicated(self, take_last=False):
513514
return self._constructor(duplicated,
514515
index=self.index).__finalize__(self)
515516
except AttributeError:
516-
from pandas.core.index import Index
517-
return Index(duplicated)
517+
return np.array(duplicated, dtype=bool)
518518

519519
#----------------------------------------------------------------------
520520
# abstracts

pandas/core/index.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@
3333

3434
_unsortable_types = frozenset(('mixed', 'mixed-integer'))
3535

36-
_index_doc_kwargs = dict(klass='Index', inplace='')
36+
_index_doc_kwargs = dict(klass='Index', inplace='',
37+
duplicated='np.array')
3738

3839

3940
def _try_get_item(x):

pandas/core/series.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@
5959
klass='Series',
6060
axes_single_arg="{0,'index'}",
6161
inplace="""inplace : boolean, default False
62-
If True, performs operation inplace and returns None."""
62+
If True, performs operation inplace and returns None.""",
63+
duplicated='Series'
6364
)
6465

6566

pandas/tests/test_base.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -614,8 +614,10 @@ def test_duplicated_drop_duplicates(self):
614614
continue
615615

616616
# original doesn't have duplicates
617-
expected = Index([False] * len(original))
618-
tm.assert_index_equal(original.duplicated(), expected)
617+
expected = np.array([False] * len(original), dtype=bool)
618+
duplicated = original.duplicated()
619+
tm.assert_numpy_array_equal(duplicated, expected)
620+
self.assertTrue(duplicated.dtype == bool)
619621
result = original.drop_duplicates()
620622
tm.assert_index_equal(result, original)
621623
self.assertFalse(result is original)
@@ -625,15 +627,19 @@ def test_duplicated_drop_duplicates(self):
625627

626628
# create repeated values, 3rd and 5th values are duplicated
627629
idx = original[list(range(len(original))) + [5, 3]]
628-
expected = Index([False] * len(original) + [True, True])
629-
tm.assert_index_equal(idx.duplicated(), expected)
630+
expected = np.array([False] * len(original) + [True, True], dtype=bool)
631+
duplicated = idx.duplicated()
632+
tm.assert_numpy_array_equal(duplicated, expected)
633+
self.assertTrue(duplicated.dtype == bool)
630634
tm.assert_index_equal(idx.drop_duplicates(), original)
631635

632636
last_base = [False] * len(idx)
633637
last_base[3] = True
634638
last_base[5] = True
635-
expected = Index(last_base)
636-
tm.assert_index_equal(idx.duplicated(take_last=True), expected)
639+
expected = np.array(last_base)
640+
duplicated = idx.duplicated(take_last=True)
641+
tm.assert_numpy_array_equal(duplicated, expected)
642+
self.assertTrue(duplicated.dtype == bool)
637643
tm.assert_index_equal(idx.drop_duplicates(take_last=True),
638644
idx[~np.array(last_base)])
639645

pandas/tests/test_multilevel.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -2075,13 +2075,17 @@ def test_duplicated_drop_duplicates(self):
20752075
# GH 4060
20762076
idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2 ,3], [1, 1, 1, 1, 2, 2]))
20772077

2078-
expected = Index([False, False, False, True, False, False])
2079-
tm.assert_index_equal(idx.duplicated(), expected)
2078+
expected = np.array([False, False, False, True, False, False], dtype=bool)
2079+
duplicated = idx.duplicated()
2080+
tm.assert_numpy_array_equal(duplicated, expected)
2081+
self.assertTrue(duplicated.dtype == bool)
20802082
expected = MultiIndex.from_arrays(([1, 2, 3, 2 ,3], [1, 1, 1, 2, 2]))
20812083
tm.assert_index_equal(idx.drop_duplicates(), expected)
20822084

2083-
expected = Index([True, False, False, False, False, False])
2084-
tm.assert_index_equal(idx.duplicated(take_last=True), expected)
2085+
expected = np.array([True, False, False, False, False, False])
2086+
duplicated = idx.duplicated(take_last=True)
2087+
tm.assert_numpy_array_equal(duplicated, expected)
2088+
self.assertTrue(duplicated.dtype == bool)
20852089
expected = MultiIndex.from_arrays(([2, 3, 1, 2 ,3], [1, 1, 1, 2, 2]))
20862090
tm.assert_index_equal(idx.drop_duplicates(take_last=True), expected)
20872091

0 commit comments

Comments
 (0)