Skip to content

Commit 749a1e0

Browse files
h-vetinarivictor
authored and
victor
committed
TST/CLN: correctly skip in indexes/common; add test for duplicated (pandas-dev#21902)
1 parent 1dcac5c commit 749a1e0

File tree

3 files changed

+48
-18
lines changed

3 files changed

+48
-18
lines changed

pandas/tests/indexes/common.py

+43-16
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,6 @@ def verify_pickle(self, indices):
3535
assert indices.equals(unpickled)
3636

3737
def test_pickle_compat_construction(self):
38-
# this is testing for pickle compat
39-
if self._holder is None:
40-
return
41-
4238
# need an object to create with
4339
pytest.raises(TypeError, self._holder)
4440

@@ -236,7 +232,7 @@ def test_set_name_methods(self, indices):
236232

237233
# don't tests a MultiIndex here (as its tested separated)
238234
if isinstance(indices, MultiIndex):
239-
return
235+
pytest.skip('Skip check for MultiIndex')
240236
original_name = indices.name
241237
new_ind = indices.set_names([new_name])
242238
assert new_ind.name == new_name
@@ -333,7 +329,8 @@ def test_copy_and_deepcopy(self, indices):
333329
from copy import copy, deepcopy
334330

335331
if isinstance(indices, MultiIndex):
336-
return
332+
pytest.skip('Skip check for MultiIndex')
333+
337334
for func in (copy, deepcopy):
338335
idx_copy = func(indices)
339336
assert idx_copy is not indices
@@ -342,20 +339,50 @@ def test_copy_and_deepcopy(self, indices):
342339
new_copy = indices.copy(deep=True, name="banana")
343340
assert new_copy.name == "banana"
344341

345-
def test_duplicates(self, indices):
342+
def test_has_duplicates(self, indices):
346343
if type(indices) is not self._holder:
347-
return
344+
pytest.skip('Can only check if we have the correct type')
348345
if not len(indices) or isinstance(indices, MultiIndex):
349-
return
346+
# MultiIndex tested separately in:
347+
# tests/indexes/multi/test_unique_and_duplicates
348+
pytest.skip('Skip check for empty Index and MultiIndex')
349+
350350
idx = self._holder([indices[0]] * 5)
351351
assert not idx.is_unique
352352
assert idx.has_duplicates
353353

354+
@pytest.mark.parametrize('keep', ['first', 'last', False])
355+
def test_duplicated(self, indices, keep):
356+
if type(indices) is not self._holder:
357+
pytest.skip('Can only check if we know the index type')
358+
if not len(indices) or isinstance(indices, MultiIndex):
359+
# MultiIndex tested separately in:
360+
# tests/indexes/multi/test_unique_and_duplicates
361+
pytest.skip('Skip check for empty Index and MultiIndex')
362+
363+
idx = self._holder(indices)
364+
if idx.has_duplicates:
365+
# We are testing the duplicated-method here, so we need to know
366+
# exactly which indices are duplicate and how (for the result).
367+
# This is not possible if "idx" has duplicates already, which we
368+
# therefore remove. This is seemingly circular, as drop_duplicates
369+
# invokes duplicated, but in the end, it all works out because we
370+
# cross-check with Series.duplicated, which is tested separately.
371+
idx = idx.drop_duplicates()
372+
373+
n, k = len(idx), 10
374+
duplicated_selection = np.random.choice(n, k * n)
375+
expected = pd.Series(duplicated_selection).duplicated(keep=keep).values
376+
idx = self._holder(idx.values[duplicated_selection])
377+
378+
result = idx.duplicated(keep=keep)
379+
tm.assert_numpy_array_equal(result, expected)
380+
354381
def test_unique(self, indices):
355382
# don't test a MultiIndex here (as its tested separated)
356383
# don't test a CategoricalIndex because categories change (GH 18291)
357384
if isinstance(indices, (MultiIndex, CategoricalIndex)):
358-
return
385+
pytest.skip('Skip check for MultiIndex/CategoricalIndex')
359386

360387
# GH 17896
361388
expected = indices.drop_duplicates()
@@ -375,7 +402,7 @@ def test_unique_na(self):
375402
def test_get_unique_index(self, indices):
376403
# MultiIndex tested separately
377404
if not len(indices) or isinstance(indices, MultiIndex):
378-
return
405+
pytest.skip('Skip check for empty Index and MultiIndex')
379406

380407
idx = indices[[0] * 5]
381408
idx_unique = indices[[0]]
@@ -394,7 +421,7 @@ def test_get_unique_index(self, indices):
394421

395422
# nans:
396423
if not indices._can_hold_na:
397-
return
424+
pytest.skip('Skip na-check if index cannot hold na')
398425

399426
if needs_i8_conversion(indices):
400427
vals = indices.asi8[[0] * 5]
@@ -423,7 +450,7 @@ def test_sort(self, indices):
423450

424451
def test_mutability(self, indices):
425452
if not len(indices):
426-
return
453+
pytest.skip('Skip check for empty Index')
427454
pytest.raises(TypeError, indices.__setitem__, 0, indices[0])
428455

429456
def test_view(self, indices):
@@ -761,7 +788,7 @@ def test_equals_op(self):
761788
# GH9947, GH10637
762789
index_a = self.create_index()
763790
if isinstance(index_a, PeriodIndex):
764-
return
791+
pytest.skip('Skip check for PeriodIndex')
765792

766793
n = len(index_a)
767794
index_b = index_a[0:-1]
@@ -989,11 +1016,11 @@ def test_searchsorted_monotonic(self, indices):
9891016
# not implemented for tuple searches in MultiIndex
9901017
# or Intervals searches in IntervalIndex
9911018
if isinstance(indices, (MultiIndex, IntervalIndex)):
992-
return
1019+
pytest.skip('Skip check for MultiIndex/IntervalIndex')
9931020

9941021
# nothing to test if the index is empty
9951022
if indices.empty:
996-
return
1023+
pytest.skip('Skip check for empty Index')
9971024
value = indices[0]
9981025

9991026
# determine the expected results (handle dupes for 'right')

pandas/tests/indexes/test_category.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -590,12 +590,15 @@ def test_is_unique(self, values, expected):
590590
ci = CategoricalIndex(values)
591591
assert ci.is_unique is expected
592592

593-
def test_duplicates(self):
593+
def test_has_duplicates(self):
594594

595595
idx = CategoricalIndex([0, 0, 0], name='foo')
596596
assert not idx.is_unique
597597
assert idx.has_duplicates
598598

599+
def test_drop_duplicates(self):
600+
601+
idx = CategoricalIndex([0, 0, 0], name='foo')
599602
expected = CategoricalIndex([0], name='foo')
600603
tm.assert_index_equal(idx.drop_duplicates(), expected)
601604
tm.assert_index_equal(idx.unique(), expected)

pandas/tests/indexes/test_range.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -806,7 +806,7 @@ def test_explicit_conversions(self):
806806
result = a - fidx
807807
tm.assert_index_equal(result, expected)
808808

809-
def test_duplicates(self):
809+
def test_has_duplicates(self):
810810
for ind in self.indices:
811811
if not len(ind):
812812
continue

0 commit comments

Comments
 (0)