Skip to content

Commit 60a6e9f

Browse files
committed
BUG: drop_duplicates drops name(s).
1 parent 2fea54a commit 60a6e9f

File tree

4 files changed

+68
-5
lines changed

4 files changed

+68
-5
lines changed

doc/source/whatsnew/v0.17.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ Bug Fixes
6363
~~~~~~~~~
6464
- Bug in ``DataFrame.apply`` when function returns categorical series. (:issue:`9573`)
6565

66-
66+
- Bug in ``Index.drop_duplicates`` dropping name(s) (:issue:`10115`)
6767
- Bug in ``pd.Series`` when setting a value on an empty ``Series`` whose index has a frequency. (:issue:`10193`)
6868
- Bug in ``DataFrame.reset_index`` when index contains `NaT`. (:issue:`10388`)
6969
- Bug in ``ExcelReader`` when worksheet is empty (:issue:`6403`)

pandas/core/index.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -2573,14 +2573,12 @@ def drop(self, labels, errors='raise'):
25732573

25742574
@Appender(_shared_docs['drop_duplicates'] % _index_doc_kwargs)
25752575
def drop_duplicates(self, take_last=False):
2576-
result = super(Index, self).drop_duplicates(take_last=take_last)
2577-
return self._constructor(result)
2576+
return super(Index, self).drop_duplicates(take_last=take_last)
25782577

25792578
@Appender(_shared_docs['duplicated'] % _index_doc_kwargs)
25802579
def duplicated(self, take_last=False):
25812580
return super(Index, self).duplicated(take_last=take_last)
25822581

2583-
25842582
def _evaluate_with_timedelta_like(self, other, op, opstr):
25852583
raise TypeError("can only perform ops with timedelta like values")
25862584

pandas/tests/test_index.py

+26-1
Original file line numberDiff line numberDiff line change
@@ -207,10 +207,19 @@ def test_duplicates(self):
207207

208208
if not len(ind):
209209
continue
210+
if isinstance(ind, MultiIndex):
211+
continue
210212
idx = self._holder([ind[0]]*5)
211213
self.assertFalse(idx.is_unique)
212214
self.assertTrue(idx.has_duplicates)
213215

216+
# GH 10115
217+
# preserve names
218+
idx.name = 'foo'
219+
result = idx.drop_duplicates()
220+
self.assertEqual(result.name, 'foo')
221+
self.assert_index_equal(result, Index([ind[0]],name='foo'))
222+
214223
def test_sort(self):
215224
for ind in self.indices.values():
216225
self.assertRaises(TypeError, ind.sort)
@@ -1830,10 +1839,13 @@ def test_reindexing(self):
18301839

18311840
def test_duplicates(self):
18321841

1833-
idx = CategoricalIndex([0, 0, 0])
1842+
idx = CategoricalIndex([0, 0, 0], name='foo')
18341843
self.assertFalse(idx.is_unique)
18351844
self.assertTrue(idx.has_duplicates)
18361845

1846+
expected = CategoricalIndex([0], name='foo')
1847+
self.assert_index_equal(idx.drop_duplicates(), expected)
1848+
18371849
def test_get_indexer(self):
18381850

18391851
idx1 = CategoricalIndex(list('aabcde'),categories=list('edabc'))
@@ -4603,6 +4615,19 @@ def check(nlevels, with_nulls):
46034615
self.assert_array_equal(mi.duplicated(),
46044616
np.zeros(len(mi), dtype='bool'))
46054617

4618+
def test_duplicate_meta_data(self):
4619+
# GH 10115
4620+
index = MultiIndex(levels=[[0, 1], [0, 1, 2]],
4621+
labels=[[0, 0, 0, 0, 1, 1, 1],
4622+
[0, 1, 2, 0, 0, 1, 2]])
4623+
for idx in [index,
4624+
index.set_names([None, None]),
4625+
index.set_names([None, 'Num']),
4626+
index.set_names(['Upper','Num']),
4627+
]:
4628+
self.assertTrue(idx.has_duplicates)
4629+
self.assertEqual(idx.drop_duplicates().names, idx.names)
4630+
46064631
def test_tolist(self):
46074632
result = self.index.tolist()
46084633
exp = list(self.index.values)

pandas/tseries/tests/test_base.py

+40
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,20 @@ def test_getitem(self):
330330
self.assert_index_equal(result, expected)
331331
self.assertEqual(result.freq, expected.freq)
332332

333+
def test_drop_duplicates_metadata(self):
334+
#GH 10115
335+
idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx')
336+
result = idx.drop_duplicates()
337+
self.assert_index_equal(idx, result)
338+
self.assertEqual(idx.freq, result.freq)
339+
340+
idx_dup = idx.append(idx)
341+
self.assertIsNone(idx_dup.freq) # freq is reset
342+
result = idx_dup.drop_duplicates()
343+
self.assert_index_equal(idx, result)
344+
self.assertIsNone(result.freq)
345+
346+
333347
class TestTimedeltaIndexOps(Ops):
334348

335349
def setUp(self):
@@ -802,6 +816,20 @@ def test_getitem(self):
802816
self.assert_index_equal(result, expected)
803817
self.assertEqual(result.freq, expected.freq)
804818

819+
def test_drop_duplicates_metadata(self):
820+
#GH 10115
821+
idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx')
822+
result = idx.drop_duplicates()
823+
self.assert_index_equal(idx, result)
824+
self.assertEqual(idx.freq, result.freq)
825+
826+
idx_dup = idx.append(idx)
827+
self.assertIsNone(idx_dup.freq) # freq is reset
828+
result = idx_dup.drop_duplicates()
829+
self.assert_index_equal(idx, result)
830+
self.assertIsNone(result.freq)
831+
832+
805833
class TestPeriodIndexOps(Ops):
806834

807835
def setUp(self):
@@ -1228,6 +1256,18 @@ def test_value_counts_unique(self):
12281256

12291257
tm.assert_index_equal(idx.unique(), exp_idx)
12301258

1259+
def test_drop_duplicates_metadata(self):
1260+
#GH 10115
1261+
idx = pd.period_range('2011-01-01', '2011-01-31', freq='D', name='idx')
1262+
result = idx.drop_duplicates()
1263+
self.assert_index_equal(idx, result)
1264+
self.assertEqual(idx.freq, result.freq)
1265+
1266+
idx_dup = idx.append(idx) # freq will not be reset
1267+
result = idx_dup.drop_duplicates()
1268+
self.assert_index_equal(idx, result)
1269+
self.assertEqual(idx.freq, result.freq)
1270+
12311271

12321272
if __name__ == '__main__':
12331273
import nose

0 commit comments

Comments
 (0)