Skip to content

Commit dfa7c1b

Browse files
committed
DOC, ENH, TST: Add more tests for duplicated()
Updates documentation to describe the "values" parameter in the signature. Adds functionality and tests for the following: 1) Accepting array-likes with no dtype attribute 2) Properly checking for duplicates in np.uint64 The algorithm currently will convert uint64 to int64, but that is not an issue since all elements in the uint64 range are unique in int64 as well. Hence, the returned result is ultimately correct.
1 parent 3ab369c commit dfa7c1b

File tree

2 files changed

+16
-2
lines changed

2 files changed

+16
-2
lines changed

pandas/core/algorithms.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -490,12 +490,14 @@ def _value_counts_arraylike(values, dropna=True):
490490

491491
def duplicated(values, keep='first'):
492492
"""
493-
Return boolean ndarray denoting duplicate values
493+
Return boolean ndarray denoting duplicate values.
494494
495495
.. versionadded:: 0.19.0
496496
497497
Parameters
498498
----------
499+
values : array-like
500+
Array over which to check for duplicate values.
499501
keep : {'first', 'last', False}, default 'first'
500502
- ``first`` : Mark duplicates as ``True`` except for the first
501503
occurrence.
@@ -508,6 +510,7 @@ def duplicated(values, keep='first'):
508510
duplicated : ndarray
509511
"""
510512

513+
values = com._asarray_tuplesafe(values)
511514
dtype = values.dtype
512515

513516
# no need to revert to original type
@@ -522,6 +525,8 @@ def duplicated(values, keep='first'):
522525
values = values.values
523526

524527
if is_integer_dtype(dtype):
528+
# This also works if dtype is uint64 because there is a 1-1
529+
# correspondence between int64 and uint64.
525530
values = _ensure_int64(values)
526531
duplicated = htable.duplicated_int64(values, keep=keep)
527532
elif is_float_dtype(dtype):

pandas/tests/test_algos.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,9 @@ def test_numeric_object_likes(self):
672672
np.array([1 + 1j, 2 + 2j, 1 + 1j, 5 + 5j, 3 + 3j,
673673
2 + 2j, 4 + 4j, 1 + 1j, 5 + 5j, 6 + 6j]),
674674
np.array(['a', 'b', 'a', 'e', 'c',
675-
'b', 'd', 'a', 'e', 'f'], dtype=object)]
675+
'b', 'd', 'a', 'e', 'f'], dtype=object),
676+
np.array([1, 2**63, 1, 3**5, 10,
677+
2**63, 39, 1, 3**5, 7], dtype=np.uint64)]
676678

677679
exp_first = np.array([False, False, True, False, False,
678680
True, False, True, True, False])
@@ -772,6 +774,13 @@ def test_unique_index(self):
772774
tm.assert_numpy_array_equal(case.duplicated(),
773775
np.array([False, False, False]))
774776

777+
def test_duplicated_non_dtype(self):
778+
# Make sure we can pass in array-likes with no dtype attribute.
779+
cases = [[1, 2, 3], tuple([1, 2, 3])]
780+
for case in cases:
781+
tm.assert_numpy_array_equal(algos.duplicated(case),
782+
np.array([False, False, False]))
783+
775784

776785
class GroupVarTestMixin(object):
777786

0 commit comments

Comments
 (0)