Skip to content

Commit 8a8fa8d

Browse files
tomspurThomas Spura
authored and
Thomas Spura
committed
Series.value_counts: Preserve original ordering
Ensure that value_counts returns the same ordering of the indices than the input object when sorting the values no matter if it is ascending or descending. This fixes pandas-dev#12679.
1 parent ca85a41 commit 8a8fa8d

File tree

3 files changed

+70
-15
lines changed

3 files changed

+70
-15
lines changed

pandas/core/algorithms.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
is_sparse, is_timedelta64_dtype, is_unsigned_integer_dtype,
2626
needs_i8_conversion)
2727
from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
28-
from pandas.core.dtypes.missing import isna, na_value_for_dtype
28+
from pandas.core.dtypes.missing import isnull, isna, na_value_for_dtype
2929

3030
from pandas.core import common as com
3131

@@ -706,6 +706,14 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
706706
keys = Index(keys)
707707
result = Series(counts, index=keys, name=name)
708708

709+
# Use same index as the original values
710+
if isinstance(keys, Index) and isnull(values).sum() == 0:
711+
result = result.reindex(unique(values))
712+
if dropna:
713+
result = result.dropna()
714+
if len(counts) > 0:
715+
result = result.astype(type(counts[0]))
716+
709717
if sort:
710718
result = result.sort_values(ascending=ascending)
711719

pandas/core/series.py

+3-14
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
from pandas.core.indexing import check_bool_indexer, maybe_convert_indices
4343
from pandas.core.internals import SingleBlockManager
4444
from pandas.core.internals.construction import sanitize_array
45+
from pandas.core.sorting import nargsort
4546
from pandas.core.strings import StringMethods
4647
from pandas.core.tools.datetimes import to_datetime
4748

@@ -2660,16 +2661,6 @@ def sort_values(self, axis=0, ascending=True, inplace=False,
26602661
raise ValueError("This Series is a view of some other array, to "
26612662
"sort in-place you must create a copy")
26622663

2663-
def _try_kind_sort(arr):
2664-
# easier to ask forgiveness than permission
2665-
try:
2666-
# if kind==mergesort, it can fail for object dtype
2667-
return arr.argsort(kind=kind)
2668-
except TypeError:
2669-
# stable sort not available for object dtype
2670-
# uses the argsort default quicksort
2671-
return arr.argsort(kind='quicksort')
2672-
26732664
arr = self._values
26742665
sortedIdx = np.empty(len(self), dtype=np.int32)
26752666

@@ -2678,8 +2669,6 @@ def _try_kind_sort(arr):
26782669
good = ~bad
26792670
idx = ibase.default_index(len(self))
26802671

2681-
argsorted = _try_kind_sort(arr[good])
2682-
26832672
if is_list_like(ascending):
26842673
if len(ascending) != 1:
26852674
raise ValueError('Length of ascending (%d) must be 1 '
@@ -2689,8 +2678,8 @@ def _try_kind_sort(arr):
26892678
if not is_bool(ascending):
26902679
raise ValueError('ascending must be boolean')
26912680

2692-
if not ascending:
2693-
argsorted = argsorted[::-1]
2681+
argsorted = nargsort(arr[good], kind=kind, ascending=ascending,
2682+
na_position=na_position)
26942683

26952684
if na_position == 'last':
26962685
n = good.sum()
+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
from pandas import Series
2+
import pandas.util.testing as tm
3+
4+
5+
def test_original_ordering_value_counts():
6+
# All items occour exactly once. No matter if sorted or not, the resulting
7+
# values should be in the same order.
8+
s = Series(list('bacdef'))
9+
10+
# Garantee the same index if value_counts(sort=False) is used
11+
vc = s.value_counts(sort=False, ascending=False)
12+
tm.assert_series_equal(Series(vc.index), s)
13+
14+
vc = s.value_counts(sort=True, ascending=False)
15+
tm.assert_series_equal(Series(vc.index), s)
16+
17+
18+
def test_original_ordering_value_counts2():
19+
# 'a' is there twice. Sorted, it should be there at the top, unsorted it
20+
# should stay where it is.
21+
s = Series(list('bacaef'))
22+
ref_nonsorted = Series(list('bacef'))
23+
ref_sorted = Series(list('abcef'))
24+
25+
# Garantee the same index if value_counts(sort=False) is used
26+
vc = s.value_counts(sort=False, ascending=False)
27+
tm.assert_series_equal(Series(vc.index), ref_nonsorted)
28+
29+
vc = s.value_counts(sort=True, ascending=False)
30+
tm.assert_series_equal(Series(vc.index), ref_sorted)
31+
32+
33+
def test_original_ordering_value_counts_ascending():
34+
# All items occour exactly once. No matter if sorted or not, the resulting
35+
# values should be in the same order.
36+
s = Series(list('bacdef'))
37+
38+
# Garantee the same index if value_counts(sort=False) is used
39+
vc = s.value_counts(sort=False, ascending=True)
40+
tm.assert_series_equal(Series(vc.index), s)
41+
42+
vc = s.value_counts(sort=True, ascending=True)
43+
tm.assert_series_equal(Series(vc.index), s)
44+
45+
46+
def test_original_ordering_value_counts_ascending2():
47+
# 'a' is there twice. Unsorted, it should be there at the bottom, unsorted
48+
# it should stay where it is.
49+
s = Series(list('bacaef'))
50+
ref_nonsorted = Series(list('bacef'))
51+
ref_sorted = Series(list('bcefa'))
52+
53+
# Garantee the same index if value_counts(sort=False) is used
54+
vc = s.value_counts(sort=False, ascending=True)
55+
tm.assert_series_equal(Series(vc.index), ref_nonsorted)
56+
57+
vc = s.value_counts(sort=True, ascending=True)
58+
tm.assert_series_equal(Series(vc.index), ref_sorted)

0 commit comments

Comments
 (0)