Skip to content

Commit 30197b5

Browse files
committed
Merge pull request #10367 from sinhrks/drop_duplicates_names
BUG: drop_duplicates drops name(s).
2 parents 4220d47 + 60a6e9f commit 30197b5

File tree

4 files changed

+68
-5
lines changed

4 files changed

+68
-5
lines changed

doc/source/whatsnew/v0.17.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ Bug Fixes
7070
~~~~~~~~~
7171
- Bug in ``DataFrame.apply`` when function returns categorical series. (:issue:`9573`)
7272

73-
73+
- Bug in ``Index.drop_duplicates`` dropping name(s) (:issue:`10115`)
7474
- Bug in ``pd.Series`` when setting a value on an empty ``Series`` whose index has a frequency. (:issue:`10193`)
7575
- Bug in ``DataFrame.reset_index`` when index contains `NaT`. (:issue:`10388`)
7676
- Bug in ``ExcelReader`` when worksheet is empty (:issue:`6403`)

pandas/core/index.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -2573,14 +2573,12 @@ def drop(self, labels, errors='raise'):
25732573

25742574
@Appender(_shared_docs['drop_duplicates'] % _index_doc_kwargs)
25752575
def drop_duplicates(self, take_last=False):
2576-
result = super(Index, self).drop_duplicates(take_last=take_last)
2577-
return self._constructor(result)
2576+
return super(Index, self).drop_duplicates(take_last=take_last)
25782577

25792578
@Appender(_shared_docs['duplicated'] % _index_doc_kwargs)
25802579
def duplicated(self, take_last=False):
25812580
return super(Index, self).duplicated(take_last=take_last)
25822581

2583-
25842582
def _evaluate_with_timedelta_like(self, other, op, opstr):
25852583
raise TypeError("can only perform ops with timedelta like values")
25862584

pandas/tests/test_index.py

+26-1
Original file line numberDiff line numberDiff line change
@@ -207,10 +207,19 @@ def test_duplicates(self):
207207

208208
if not len(ind):
209209
continue
210+
if isinstance(ind, MultiIndex):
211+
continue
210212
idx = self._holder([ind[0]]*5)
211213
self.assertFalse(idx.is_unique)
212214
self.assertTrue(idx.has_duplicates)
213215

216+
# GH 10115
217+
# preserve names
218+
idx.name = 'foo'
219+
result = idx.drop_duplicates()
220+
self.assertEqual(result.name, 'foo')
221+
self.assert_index_equal(result, Index([ind[0]],name='foo'))
222+
214223
def test_sort(self):
215224
for ind in self.indices.values():
216225
self.assertRaises(TypeError, ind.sort)
@@ -1830,10 +1839,13 @@ def test_reindexing(self):
18301839

18311840
def test_duplicates(self):
18321841

1833-
idx = CategoricalIndex([0, 0, 0])
1842+
idx = CategoricalIndex([0, 0, 0], name='foo')
18341843
self.assertFalse(idx.is_unique)
18351844
self.assertTrue(idx.has_duplicates)
18361845

1846+
expected = CategoricalIndex([0], name='foo')
1847+
self.assert_index_equal(idx.drop_duplicates(), expected)
1848+
18371849
def test_get_indexer(self):
18381850

18391851
idx1 = CategoricalIndex(list('aabcde'),categories=list('edabc'))
@@ -4595,6 +4607,19 @@ def check(nlevels, with_nulls):
45954607
self.assert_array_equal(mi.duplicated(),
45964608
np.zeros(len(mi), dtype='bool'))
45974609

4610+
def test_duplicate_meta_data(self):
4611+
# GH 10115
4612+
index = MultiIndex(levels=[[0, 1], [0, 1, 2]],
4613+
labels=[[0, 0, 0, 0, 1, 1, 1],
4614+
[0, 1, 2, 0, 0, 1, 2]])
4615+
for idx in [index,
4616+
index.set_names([None, None]),
4617+
index.set_names([None, 'Num']),
4618+
index.set_names(['Upper','Num']),
4619+
]:
4620+
self.assertTrue(idx.has_duplicates)
4621+
self.assertEqual(idx.drop_duplicates().names, idx.names)
4622+
45984623
def test_tolist(self):
45994624
result = self.index.tolist()
46004625
exp = list(self.index.values)

pandas/tseries/tests/test_base.py

+40
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,20 @@ def test_getitem(self):
330330
self.assert_index_equal(result, expected)
331331
self.assertEqual(result.freq, expected.freq)
332332

333+
def test_drop_duplicates_metadata(self):
334+
#GH 10115
335+
idx = pd.date_range('2011-01-01', '2011-01-31', freq='D', name='idx')
336+
result = idx.drop_duplicates()
337+
self.assert_index_equal(idx, result)
338+
self.assertEqual(idx.freq, result.freq)
339+
340+
idx_dup = idx.append(idx)
341+
self.assertIsNone(idx_dup.freq) # freq is reset
342+
result = idx_dup.drop_duplicates()
343+
self.assert_index_equal(idx, result)
344+
self.assertIsNone(result.freq)
345+
346+
333347
class TestTimedeltaIndexOps(Ops):
334348

335349
def setUp(self):
@@ -802,6 +816,20 @@ def test_getitem(self):
802816
self.assert_index_equal(result, expected)
803817
self.assertEqual(result.freq, expected.freq)
804818

819+
def test_drop_duplicates_metadata(self):
820+
#GH 10115
821+
idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx')
822+
result = idx.drop_duplicates()
823+
self.assert_index_equal(idx, result)
824+
self.assertEqual(idx.freq, result.freq)
825+
826+
idx_dup = idx.append(idx)
827+
self.assertIsNone(idx_dup.freq) # freq is reset
828+
result = idx_dup.drop_duplicates()
829+
self.assert_index_equal(idx, result)
830+
self.assertIsNone(result.freq)
831+
832+
805833
class TestPeriodIndexOps(Ops):
806834

807835
def setUp(self):
@@ -1228,6 +1256,18 @@ def test_value_counts_unique(self):
12281256

12291257
tm.assert_index_equal(idx.unique(), exp_idx)
12301258

1259+
def test_drop_duplicates_metadata(self):
1260+
#GH 10115
1261+
idx = pd.period_range('2011-01-01', '2011-01-31', freq='D', name='idx')
1262+
result = idx.drop_duplicates()
1263+
self.assert_index_equal(idx, result)
1264+
self.assertEqual(idx.freq, result.freq)
1265+
1266+
idx_dup = idx.append(idx) # freq will not be reset
1267+
result = idx_dup.drop_duplicates()
1268+
self.assert_index_equal(idx, result)
1269+
self.assertEqual(idx.freq, result.freq)
1270+
12311271

12321272
if __name__ == '__main__':
12331273
import nose

0 commit comments

Comments
 (0)