Skip to content

Commit 654e739

Browse files
committed
Merge pull request #10419 from sinhrks/base_name_handling
BUG: Fix value_counts name handling
2 parents 3908ad5 + b6a9309 commit 654e739

File tree

5 files changed

+111
-84
lines changed

5 files changed

+111
-84
lines changed

doc/source/whatsnew/v0.17.0.txt

+4
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ Performance Improvements
7373

7474
Bug Fixes
7575
~~~~~~~~~
76+
7677
- Bug in ``DataFrame.apply`` when function returns categorical series. (:issue:`9573`)
7778

7879

@@ -100,3 +101,6 @@ Bug Fixes
100101

101102

102103
- Bug that caused segfault when resampling an empty Series (:issue:`10228`)
104+
- Bug in ``DatetimeIndex`` and ``PeriodIndex.value_counts`` resets name from its result, but retains in result's ``Index``. (:issue:`10150`)
105+
106+

pandas/core/algorithms.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
202202
from pandas.tools.tile import cut
203203
from pandas.tseries.period import PeriodIndex
204204

205+
name = getattr(values, 'name', None)
205206
values = Series(values).values
206207

207208
if bins is not None:
@@ -222,7 +223,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
222223
if com.is_datetime_or_timedelta_dtype(dtype) or is_period:
223224

224225
if is_period:
225-
values = PeriodIndex(values)
226+
values = PeriodIndex(values, name=name)
226227

227228
values = values.view(np.int64)
228229
keys, counts = htable.value_count_int64(values)
@@ -247,7 +248,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
247248
keys = np.insert(keys, 0, np.NaN)
248249
counts = np.insert(counts, 0, mask.sum())
249250

250-
result = Series(counts, index=com._values_from_object(keys))
251+
result = Series(counts, index=com._values_from_object(keys), name=name)
251252

252253
if bins is not None:
253254
# TODO: This next line should be more efficient

pandas/core/base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -431,10 +431,10 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
431431

432432
if isinstance(self, PeriodIndex):
433433
# preserve freq
434-
result.index = self._simple_new(result.index.values, self.name,
434+
result.index = self._simple_new(result.index.values,
435435
freq=self.freq)
436436
elif isinstance(self, DatetimeIndex):
437-
result.index = self._simple_new(result.index.values, self.name,
437+
result.index = self._simple_new(result.index.values,
438438
tz=getattr(self, 'tz', None))
439439
return result
440440

pandas/tests/test_base.py

+72-50
Original file line numberDiff line numberDiff line change
@@ -181,23 +181,24 @@ def f():
181181

182182
class Ops(tm.TestCase):
183183
def setUp(self):
184-
self.bool_index = tm.makeBoolIndex(10)
185-
self.int_index = tm.makeIntIndex(10)
186-
self.float_index = tm.makeFloatIndex(10)
187-
self.dt_index = tm.makeDateIndex(10)
188-
self.dt_tz_index = tm.makeDateIndex(10).tz_localize(tz='US/Eastern')
189-
self.period_index = tm.makePeriodIndex(10)
190-
self.string_index = tm.makeStringIndex(10)
184+
self.bool_index = tm.makeBoolIndex(10, name='a')
185+
self.int_index = tm.makeIntIndex(10, name='a')
186+
self.float_index = tm.makeFloatIndex(10, name='a')
187+
self.dt_index = tm.makeDateIndex(10, name='a')
188+
self.dt_tz_index = tm.makeDateIndex(10, name='a').tz_localize(tz='US/Eastern')
189+
self.period_index = tm.makePeriodIndex(10, name='a')
190+
self.string_index = tm.makeStringIndex(10, name='a')
191+
self.unicode_index = tm.makeUnicodeIndex(10, name='a')
191192

192193
arr = np.random.randn(10)
193-
self.int_series = Series(arr, index=self.int_index)
194-
self.float_series = Series(arr, index=self.float_index)
195-
self.dt_series = Series(arr, index=self.dt_index)
194+
self.int_series = Series(arr, index=self.int_index, name='a')
195+
self.float_series = Series(arr, index=self.float_index, name='a')
196+
self.dt_series = Series(arr, index=self.dt_index, name='a')
196197
self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True)
197-
self.period_series = Series(arr, index=self.period_index)
198-
self.string_series = Series(arr, index=self.string_index)
198+
self.period_series = Series(arr, index=self.period_index, name='a')
199+
self.string_series = Series(arr, index=self.string_index, name='a')
199200

200-
types = ['bool','int','float','dt', 'dt_tz', 'period','string']
201+
types = ['bool','int','float','dt', 'dt_tz', 'period','string', 'unicode']
201202
fmts = [ "{0}_{1}".format(t,f) for t in types for f in ['index','series'] ]
202203
self.objs = [ getattr(self,f) for f in fmts if getattr(self,f,None) is not None ]
203204

@@ -213,9 +214,9 @@ def check_ops_properties(self, props, filter=None, ignore_failures=False):
213214

214215
try:
215216
if isinstance(o, Series):
216-
expected = Series(getattr(o.index,op),index=o.index)
217+
expected = Series(getattr(o.index,op), index=o.index, name='a')
217218
else:
218-
expected = getattr(o,op)
219+
expected = getattr(o, op)
219220
except (AttributeError):
220221
if ignore_failures:
221222
continue
@@ -361,21 +362,28 @@ def test_value_counts_unique_nunique(self):
361362
# create repeated values, 'n'th element is repeated by n+1 times
362363
if isinstance(o, PeriodIndex):
363364
# freq must be specified because repeat makes freq ambiguous
364-
expected_index = o[::-1]
365-
o = klass(np.repeat(values, range(1, len(o) + 1)), freq=o.freq)
365+
366+
# resets name from Index
367+
expected_index = pd.Index(o[::-1], name=None)
368+
369+
# attach name to klass
370+
o = klass(np.repeat(values, range(1, len(o) + 1)), freq=o.freq, name='a')
366371
# don't test boolean
367372
elif isinstance(o,Index) and o.is_boolean():
368373
continue
369374
elif isinstance(o, Index):
370-
expected_index = values[::-1]
371-
o = klass(np.repeat(values, range(1, len(o) + 1)))
375+
expected_index = pd.Index(values[::-1], name=None)
376+
o = klass(np.repeat(values, range(1, len(o) + 1)), name='a')
372377
else:
373-
expected_index = values[::-1]
378+
expected_index = pd.Index(values[::-1], name=None)
374379
idx = np.repeat(o.index.values, range(1, len(o) + 1))
375-
o = klass(np.repeat(values, range(1, len(o) + 1)), index=idx)
380+
o = klass(np.repeat(values, range(1, len(o) + 1)), index=idx, name='a')
376381

377-
expected_s = Series(range(10, 0, -1), index=expected_index, dtype='int64')
378-
tm.assert_series_equal(o.value_counts(), expected_s)
382+
expected_s = Series(range(10, 0, -1), index=expected_index, dtype='int64', name='a')
383+
result = o.value_counts()
384+
tm.assert_series_equal(result, expected_s)
385+
self.assertTrue(result.index.name is None)
386+
self.assertEqual(result.name, 'a')
379387

380388
result = o.unique()
381389
if isinstance(o, (DatetimeIndex, PeriodIndex)):
@@ -410,21 +418,34 @@ def test_value_counts_unique_nunique(self):
410418
# create repeated values, 'n'th element is repeated by n+1 times
411419
if isinstance(o, PeriodIndex):
412420
# freq must be specified because repeat makes freq ambiguous
413-
expected_index = o
414-
o = klass(np.repeat(values, range(1, len(o) + 1)), freq=o.freq)
421+
422+
# resets name from Index
423+
expected_index = pd.Index(o, name=None)
424+
# attach name to klass
425+
o = klass(np.repeat(values, range(1, len(o) + 1)), freq=o.freq, name='a')
415426
elif isinstance(o, Index):
416-
expected_index = values
417-
o = klass(np.repeat(values, range(1, len(o) + 1)))
427+
expected_index = pd.Index(values, name=None)
428+
o = klass(np.repeat(values, range(1, len(o) + 1)), name='a')
418429
else:
419-
expected_index = values
430+
expected_index = pd.Index(values, name=None)
420431
idx = np.repeat(o.index.values, range(1, len(o) + 1))
421-
o = klass(np.repeat(values, range(1, len(o) + 1)), index=idx)
422-
423-
expected_s_na = Series(list(range(10, 2, -1)) +[3], index=expected_index[9:0:-1], dtype='int64')
424-
expected_s = Series(list(range(10, 2, -1)), index=expected_index[9:1:-1], dtype='int64')
425-
426-
tm.assert_series_equal(o.value_counts(dropna=False), expected_s_na)
432+
o = klass(np.repeat(values, range(1, len(o) + 1)), index=idx, name='a')
433+
434+
expected_s_na = Series(list(range(10, 2, -1)) +[3],
435+
index=expected_index[9:0:-1],
436+
dtype='int64', name='a')
437+
expected_s = Series(list(range(10, 2, -1)),
438+
index=expected_index[9:1:-1],
439+
dtype='int64', name='a')
440+
441+
result_s_na = o.value_counts(dropna=False)
442+
tm.assert_series_equal(result_s_na, expected_s_na)
443+
self.assertTrue(result_s_na.index.name is None)
444+
self.assertEqual(result_s_na.name, 'a')
445+
result_s = o.value_counts()
427446
tm.assert_series_equal(o.value_counts(), expected_s)
447+
self.assertTrue(result_s.index.name is None)
448+
self.assertEqual(result_s.name, 'a')
428449

429450
# numpy_array_equal cannot compare arrays includes nan
430451
result = o.unique()
@@ -508,14 +529,15 @@ def test_value_counts_inferred(self):
508529
df = pd.read_fwf(f, widths=[6, 8, 3], names=["person_id", "dt", "food"],
509530
parse_dates=["dt"])
510531

511-
s = klass(df['dt'].copy())
532+
s = klass(df['dt'].copy(), name='dt')
512533

513-
idx = pd.to_datetime(['2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z', '2009-01-01 00:00:00X'])
514-
expected_s = Series([3, 2, 1], index=idx)
534+
idx = pd.to_datetime(['2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z',
535+
'2009-01-01 00:00:00X'])
536+
expected_s = Series([3, 2, 1], index=idx, name='dt')
515537
tm.assert_series_equal(s.value_counts(), expected_s)
516538

517-
expected = np.array(['2010-01-01 00:00:00Z', '2009-01-01 00:00:00Z', '2008-09-09 00:00:00Z'],
518-
dtype='datetime64[ns]')
539+
expected = np.array(['2010-01-01 00:00:00Z', '2009-01-01 00:00:00Z',
540+
'2008-09-09 00:00:00Z'], dtype='datetime64[ns]')
519541
if isinstance(s, DatetimeIndex):
520542
expected = DatetimeIndex(expected)
521543
self.assertTrue(s.unique().equals(expected))
@@ -526,7 +548,7 @@ def test_value_counts_inferred(self):
526548

527549
# with NaT
528550
s = df['dt'].copy()
529-
s = klass([v for v in s.values] + [pd.NaT])
551+
s = klass([v for v in s.values] + [pd.NaT], name='dt')
530552

531553
result = s.value_counts()
532554
self.assertEqual(result.index.dtype, 'datetime64[ns]')
@@ -547,10 +569,10 @@ def test_value_counts_inferred(self):
547569

548570
# timedelta64[ns]
549571
td = df.dt - df.dt + timedelta(1)
550-
td = klass(td)
572+
td = klass(td, name='dt')
551573

552574
result = td.value_counts()
553-
expected_s = Series([6], index=[Timedelta('1day')])
575+
expected_s = Series([6], index=[Timedelta('1day')], name='dt')
554576
tm.assert_series_equal(result, expected_s)
555577

556578
expected = TimedeltaIndex(['1 days'])
@@ -560,9 +582,8 @@ def test_value_counts_inferred(self):
560582
self.assert_numpy_array_equal(td.unique(), expected.values)
561583

562584
td2 = timedelta(1) + (df.dt - df.dt)
563-
td2 = klass(td2)
585+
td2 = klass(td2, name='dt')
564586
result2 = td2.value_counts()
565-
566587
tm.assert_series_equal(result2, expected_s)
567588

568589
def test_factorize(self):
@@ -629,7 +650,7 @@ def test_duplicated_drop_duplicates(self):
629650
# special case
630651
if original.is_boolean():
631652
result = original.drop_duplicates()
632-
expected = Index([False,True])
653+
expected = Index([False,True], name='a')
633654
tm.assert_index_equal(result, expected)
634655
continue
635656

@@ -668,25 +689,26 @@ def test_duplicated_drop_duplicates(self):
668689
idx.drop_duplicates(inplace=True)
669690

670691
else:
671-
expected = Series([False] * len(original), index=original.index)
692+
expected = Series([False] * len(original),
693+
index=original.index, name='a')
672694
tm.assert_series_equal(original.duplicated(), expected)
673695
result = original.drop_duplicates()
674696
tm.assert_series_equal(result, original)
675697
self.assertFalse(result is original)
676698

677699
idx = original.index[list(range(len(original))) + [5, 3]]
678700
values = original.values[list(range(len(original))) + [5, 3]]
679-
s = Series(values, index=idx)
701+
s = Series(values, index=idx, name='a')
680702

681-
expected = Series([False] * len(original) + [True, True], index=idx)
703+
expected = Series([False] * len(original) + [True, True],
704+
index=idx, name='a')
682705
tm.assert_series_equal(s.duplicated(), expected)
683706
tm.assert_series_equal(s.drop_duplicates(), original)
684707

685708
last_base = [False] * len(idx)
686709
last_base[3] = True
687710
last_base[5] = True
688-
expected = Series(last_base, index=idx)
689-
expected
711+
expected = Series(last_base, index=idx, name='a')
690712
tm.assert_series_equal(s.duplicated(take_last=True), expected)
691713
tm.assert_series_equal(s.drop_duplicates(take_last=True),
692714
s[~np.array(last_base)])

pandas/util/testing.py

+30-30
Original file line numberDiff line numberDiff line change
@@ -817,43 +817,43 @@ def getArangeMat():
817817

818818

819819
# make index
820-
def makeStringIndex(k=10):
821-
return Index(rands_array(nchars=10, size=k))
820+
def makeStringIndex(k=10, name=None):
821+
return Index(rands_array(nchars=10, size=k), name=name)
822822

823823

824-
def makeUnicodeIndex(k=10):
824+
def makeUnicodeIndex(k=10, name=None):
825825
return Index(randu_array(nchars=10, size=k))
826826

827-
def makeCategoricalIndex(k=10, n=3):
827+
def makeCategoricalIndex(k=10, n=3, name=None):
828828
""" make a length k index or n categories """
829829
x = rands_array(nchars=4, size=n)
830-
return CategoricalIndex(np.random.choice(x,k))
830+
return CategoricalIndex(np.random.choice(x,k), name=name)
831831

832-
def makeBoolIndex(k=10):
832+
def makeBoolIndex(k=10, name=None):
833833
if k == 1:
834-
return Index([True])
834+
return Index([True], name=name)
835835
elif k == 2:
836-
return Index([False,True])
837-
return Index([False,True] + [False]*(k-2))
836+
return Index([False,True], name=name)
837+
return Index([False,True] + [False]*(k-2), name=name)
838838

839-
def makeIntIndex(k=10):
840-
return Index(lrange(k))
839+
def makeIntIndex(k=10, name=None):
840+
return Index(lrange(k), name=name)
841841

842-
def makeFloatIndex(k=10):
842+
def makeFloatIndex(k=10, name=None):
843843
values = sorted(np.random.random_sample(k)) - np.random.random_sample(1)
844-
return Index(values * (10 ** np.random.randint(0, 9)))
844+
return Index(values * (10 ** np.random.randint(0, 9)), name=name)
845845

846-
def makeDateIndex(k=10, freq='B'):
846+
def makeDateIndex(k=10, freq='B', name=None):
847847
dt = datetime(2000, 1, 1)
848-
dr = bdate_range(dt, periods=k, freq=freq)
849-
return DatetimeIndex(dr)
848+
dr = bdate_range(dt, periods=k, freq=freq, name=name)
849+
return DatetimeIndex(dr, name=name)
850850

851-
def makeTimedeltaIndex(k=10, freq='D'):
852-
return TimedeltaIndex(start='1 day',periods=k,freq=freq)
851+
def makeTimedeltaIndex(k=10, freq='D', name=None):
852+
return TimedeltaIndex(start='1 day', periods=k, freq=freq, name=name)
853853

854-
def makePeriodIndex(k=10):
854+
def makePeriodIndex(k=10, name=None):
855855
dt = datetime(2000, 1, 1)
856-
dr = PeriodIndex(start=dt, periods=k, freq='B')
856+
dr = PeriodIndex(start=dt, periods=k, freq='B', name=name)
857857
return dr
858858

859859
def all_index_generator(k=10):
@@ -885,38 +885,38 @@ def all_timeseries_index_generator(k=10):
885885

886886

887887
# make series
888-
def makeFloatSeries():
888+
def makeFloatSeries(name=None):
889889
index = makeStringIndex(N)
890-
return Series(randn(N), index=index)
890+
return Series(randn(N), index=index, name=name)
891891

892892

893-
def makeStringSeries():
893+
def makeStringSeries(name=None):
894894
index = makeStringIndex(N)
895-
return Series(randn(N), index=index)
895+
return Series(randn(N), index=index, name=name)
896896

897897

898-
def makeObjectSeries():
898+
def makeObjectSeries(name=None):
899899
dateIndex = makeDateIndex(N)
900900
dateIndex = Index(dateIndex, dtype=object)
901901
index = makeStringIndex(N)
902-
return Series(dateIndex, index=index)
902+
return Series(dateIndex, index=index, name=name)
903903

904904

905905
def getSeriesData():
906906
index = makeStringIndex(N)
907907
return dict((c, Series(randn(N), index=index)) for c in getCols(K))
908908

909909

910-
def makeTimeSeries(nper=None, freq='B'):
910+
def makeTimeSeries(nper=None, freq='B', name=None):
911911
if nper is None:
912912
nper = N
913-
return Series(randn(nper), index=makeDateIndex(nper, freq=freq))
913+
return Series(randn(nper), index=makeDateIndex(nper, freq=freq), name=name)
914914

915915

916-
def makePeriodSeries(nper=None):
916+
def makePeriodSeries(nper=None, name=None):
917917
if nper is None:
918918
nper = N
919-
return Series(randn(nper), index=makePeriodIndex(nper))
919+
return Series(randn(nper), index=makePeriodIndex(nper), name=name)
920920

921921

922922
def getTimeSeriesData(nper=None, freq='B'):

0 commit comments

Comments
 (0)