Skip to content

Commit 0270839

Browse files
committed
Series.value_counts: Preserve original ordering when using sort=False
Ensure that value_counts returns the same ordering of the indices than the input object when sorting the values no matter if it is ascending or descending. This fixes pandas-dev#12679.
1 parent 5d134ec commit 0270839

File tree

3 files changed

+42
-1
lines changed

3 files changed

+42
-1
lines changed

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1634,6 +1634,7 @@ Other
16341634

16351635
- Bug where C variables were declared with external linkage causing import errors if certain other C libraries were imported before Pandas. (:issue:`24113`)
16361636
- Require at least 0.28.2 version of ``cython`` to support read-only memoryviews (:issue:`21688`)
1637+
- :meth:`Series.value_counts` returns the counts in the same ordering as the original series when using ``sort=False`` (:issue:`12679`)
16371638

16381639
.. _whatsnew_0.24.0.contributors:
16391640

pandas/core/algorithms.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -666,7 +666,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
666666
value_counts : Series
667667
668668
"""
669-
from pandas.core.series import Series, Index
669+
from pandas import Series, Index, CategoricalIndex
670670
name = getattr(values, 'name', None)
671671

672672
if bins is not None:
@@ -708,6 +708,10 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
708708

709709
if sort:
710710
result = result.sort_values(ascending=ascending)
711+
else:
712+
uniq = unique(values)
713+
if not isinstance(result.index, CategoricalIndex):
714+
result = result.reindex(uniq)
711715

712716
if normalize:
713717
result = result / float(counts.sum())

pandas/tests/test_algos.py

+36
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,42 @@ def test_value_counts_uint64(self):
962962
if not compat.is_platform_32bit():
963963
tm.assert_series_equal(result, expected)
964964

965+
def test_value_counts_nonsorted(self):
966+
# All items occour exactly once.
967+
# No matter if sorted or not, the resulting values should be in
968+
# the same order.
969+
s = Series(list('bacdef'))
970+
971+
# Garantee the same index if value_counts(sort=False) is used
972+
vc = s.value_counts(sort=False, ascending=False)
973+
tm.assert_series_equal(Series(vc.index), s)
974+
vc = s.value_counts(sort=False, ascending=True)
975+
tm.assert_series_equal(Series(vc.index), s)
976+
977+
# Garantee does not hold yet for the sort=True case
978+
# vc = s.value_counts(sort=True, ascending=False)
979+
# tm.assert_series_equal(Series(vc.index), s)
980+
# vc = s.value_counts(sort=True, ascending=True)
981+
# tm.assert_series_equal(Series(vc.index), s)
982+
983+
# 'a' is there twice. Sorted, it should be there at the top.
984+
# Unsorted it should stay where it is.
985+
s = Series(list('bacaef'))
986+
ref_nonsorted = Series(list('bacef'))
987+
ref_sorted = Series(list('abcef'))
988+
989+
# Garantee the same index if value_counts(sort=False) is used
990+
vc = s.value_counts(sort=False, ascending=False)
991+
tm.assert_series_equal(Series(vc.index), ref_nonsorted)
992+
vc = s.value_counts(sort=False, ascending=True)
993+
tm.assert_series_equal(Series(vc.index), ref_nonsorted)
994+
995+
# Garantee does not hold yet for the sort=True case
996+
# vc = s.value_counts(sort=True, ascending=False)
997+
# tm.assert_series_equal(Series(vc.index), ref_sorted)
998+
# vc = s.value_counts(sort=True, ascending=True)
999+
# tm.assert_series_equal(Series(vc.index), ref_sorted)
1000+
9651001

9661002
class TestDuplicated(object):
9671003

0 commit comments

Comments
 (0)