Skip to content

Commit 957d105

Browse files
committed
Expand tests
1 parent b025232 commit 957d105

File tree

20 files changed

+135
-120
lines changed

20 files changed

+135
-120
lines changed

pandas/core/algorithms.py

+2
Original file line numberDiff line numberDiff line change
@@ -727,6 +727,8 @@ def value_counts(
727727
if not isinstance(counts, np.ndarray):
728728
counts = counts.to_numpy()
729729
result = result / float(counts.sum())
730+
else:
731+
result = result.astype("Int64")
730732

731733
return result
732734

pandas/core/arrays/masked.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ def value_counts(self, dropna: bool = True) -> "Series":
259259

260260
# if we want nans, count the mask
261261
if dropna:
262-
counts = value_counts._values
262+
counts = np.array(value_counts._values, dtype=int)
263263
else:
264264
counts = np.empty(len(value_counts) + 1, dtype="int64")
265265
counts[:-1] = value_counts

pandas/core/groupby/generic.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -731,7 +731,12 @@ def value_counts(
731731
)
732732

733733
if is_integer_dtype(out):
734-
out = ensure_int64(out)
734+
return Series(
735+
ensure_int64(out),
736+
index=mi,
737+
name=self._selection_name,
738+
dtype="Int64",
739+
)
735740
return Series(out, index=mi, name=self._selection_name)
736741

737742
# for compat. with libgroupby.value_counts need to ensure every
@@ -763,7 +768,10 @@ def build_codes(lev_codes: np.ndarray) -> np.ndarray:
763768
mi = MultiIndex(levels=levels, codes=codes, names=names, verify_integrity=False)
764769

765770
if is_integer_dtype(out):
766-
out = ensure_int64(out)
771+
return Series(
772+
ensure_int64(out), index=mi, name=self._selection_name, dtype="Int64"
773+
)
774+
767775
return Series(out, index=mi, name=self._selection_name)
768776

769777
def count(self) -> Series:

pandas/tests/arrays/integer/test_function.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def test_stat_method(pandasmethname, kwargs):
9393

9494

9595
def test_value_counts_na():
96-
arr = pd.array([1, 2, 1, pd.NA], dtype="Int64")
96+
arr = pd.array([1, 2, 1, pd.NA])
9797
result = arr.value_counts(dropna=False)
9898
expected = pd.Series([2, 1, 1], index=[1, 2, pd.NA], dtype="Int64")
9999
tm.assert_series_equal(result, expected)
@@ -105,10 +105,10 @@ def test_value_counts_na():
105105

106106
def test_value_counts_empty():
107107
# https://github.com/pandas-dev/pandas/issues/33317
108-
s = pd.Series([], dtype="Int64")
108+
s = pd.Series([])
109109
result = s.value_counts()
110110
# TODO: The dtype of the index seems wrong (it's int64 for non-empty)
111-
idx = pd.Index([], dtype="object")
111+
idx = pd.Float64Index([], dtype="float64")
112112
expected = pd.Series([], index=idx, dtype="Int64")
113113
tm.assert_series_equal(result, expected)
114114

pandas/tests/arrays/string_/test_string.py

-10
Original file line numberDiff line numberDiff line change
@@ -277,13 +277,3 @@ def test_value_counts_na():
277277
result = arr.value_counts(dropna=True)
278278
expected = pd.Series([2, 1], index=["a", "b"], dtype="Int64")
279279
tm.assert_series_equal(result, expected)
280-
281-
282-
def test_normalize_value_counts():
283-
result = (
284-
pd.Series(list("abcd"), dtype="string")
285-
.value_counts(normalize=True)
286-
.sort_index()
287-
)
288-
expected = pd.Series([0.25, 0.25, 0.25, 0.25], index=["a", "b", "c", "d"])
289-
tm.assert_series_equal(expected, result)

pandas/tests/arrays/test_datetimes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ def test_value_counts_preserves_tz(self):
248248

249249
arr[-2] = pd.NaT
250250
result = arr.value_counts()
251-
expected = pd.Series([1, 4, 2], index=[pd.NaT, dti[0], dti[1]])
251+
expected = pd.Series([1, 4, 2], index=[pd.NaT, dti[0], dti[1]], dtype="Int64")
252252
tm.assert_series_equal(result, expected)
253253

254254
@pytest.mark.parametrize("method", ["pad", "backfill"])

pandas/tests/base/test_value_counts.py

+20-18
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def test_value_counts(index_or_series_obj):
3030
result = obj.value_counts()
3131

3232
counter = collections.Counter(obj)
33-
expected = pd.Series(dict(counter.most_common()), dtype=np.int64, name=obj.name)
33+
expected = pd.Series(dict(counter.most_common()), dtype="Int64", name=obj.name)
3434
expected.index = expected.index.astype(obj.dtype)
3535
if isinstance(obj, pd.MultiIndex):
3636
expected.index = pd.Index(expected.index)
@@ -67,7 +67,7 @@ def test_value_counts_null(null_obj, index_or_series_obj):
6767
# because np.nan == np.nan is False, but None == None is True
6868
# np.nan would be duplicated, whereas None wouldn't
6969
counter = collections.Counter(obj.dropna())
70-
expected = pd.Series(dict(counter.most_common()), dtype=np.int64)
70+
expected = pd.Series(dict(counter.most_common()), dtype="Int64")
7171
expected.index = expected.index.astype(obj.dtype)
7272

7373
result = obj.value_counts()
@@ -80,7 +80,7 @@ def test_value_counts_null(null_obj, index_or_series_obj):
8080

8181
# can't use expected[null_obj] = 3 as
8282
# IntervalIndex doesn't allow assignment
83-
new_entry = pd.Series({np.nan: 3}, dtype=np.int64)
83+
new_entry = pd.Series({np.nan: 3}, dtype="Int64")
8484
expected = expected.append(new_entry)
8585

8686
result = obj.value_counts(dropna=False)
@@ -96,7 +96,7 @@ def test_value_counts_inferred(index_or_series):
9696
klass = index_or_series
9797
s_values = ["a", "b", "b", "b", "b", "c", "d", "d", "a", "a"]
9898
s = klass(s_values)
99-
expected = Series([4, 3, 2, 1], index=["b", "a", "d", "c"])
99+
expected = Series([4, 3, 2, 1], index=["b", "a", "d", "c"], dtype="Int64")
100100
tm.assert_series_equal(s.value_counts(), expected)
101101

102102
if isinstance(s, Index):
@@ -110,17 +110,17 @@ def test_value_counts_inferred(index_or_series):
110110
# don't sort, have to sort after the fact as not sorting is
111111
# platform-dep
112112
hist = s.value_counts(sort=False).sort_values()
113-
expected = Series([3, 1, 4, 2], index=list("acbd")).sort_values()
113+
expected = Series([3, 1, 4, 2], index=list("acbd"), dtype="Int64").sort_values()
114114
tm.assert_series_equal(hist, expected)
115115

116116
# sort ascending
117117
hist = s.value_counts(ascending=True)
118-
expected = Series([1, 2, 3, 4], index=list("cdab"))
118+
expected = Series([1, 2, 3, 4], index=list("cdab"), dtype="Int64")
119119
tm.assert_series_equal(hist, expected)
120120

121121
# relative histogram.
122122
hist = s.value_counts(normalize=True)
123-
expected = Series([0.4, 0.3, 0.2, 0.1], index=["b", "a", "d", "c"])
123+
expected = Series([0.4, 0.3, 0.2, 0.1], index=["b", "a", "d", "c"], dtype="float64")
124124
tm.assert_series_equal(hist, expected)
125125

126126

@@ -136,39 +136,41 @@ def test_value_counts_bins(index_or_series):
136136

137137
s1 = Series([1, 1, 2, 3])
138138
res1 = s1.value_counts(bins=1)
139-
exp1 = Series({Interval(0.997, 3.0): 4})
139+
exp1 = Series({Interval(0.997, 3.0): 4}, dtype="Int64")
140140
tm.assert_series_equal(res1, exp1)
141141
res1n = s1.value_counts(bins=1, normalize=True)
142-
exp1n = Series({Interval(0.997, 3.0): 1.0})
142+
exp1n = Series({Interval(0.997, 3.0): 1.0}, dtype="float64")
143143
tm.assert_series_equal(res1n, exp1n)
144144

145145
if isinstance(s1, Index):
146146
tm.assert_index_equal(s1.unique(), Index([1, 2, 3]))
147147
else:
148-
exp = np.array([1, 2, 3], dtype=np.int64)
148+
exp = np.array([1, 2, 3], dtype="Int64")
149149
tm.assert_numpy_array_equal(s1.unique(), exp)
150150

151151
assert s1.nunique() == 3
152152

153153
# these return the same
154154
res4 = s1.value_counts(bins=4, dropna=True)
155155
intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0])
156-
exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2]))
156+
exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2]), dtype="Int64")
157157
tm.assert_series_equal(res4, exp4)
158158

159159
res4 = s1.value_counts(bins=4, dropna=False)
160160
intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0])
161-
exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2]))
161+
exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2]), dtype="Int64")
162162
tm.assert_series_equal(res4, exp4)
163163

164164
res4n = s1.value_counts(bins=4, normalize=True)
165-
exp4n = Series([0.5, 0.25, 0.25, 0], index=intervals.take([0, 3, 1, 2]))
165+
exp4n = Series(
166+
[0.5, 0.25, 0.25, 0], index=intervals.take([0, 3, 1, 2]), dtype="float64"
167+
)
166168
tm.assert_series_equal(res4n, exp4n)
167169

168170
# handle NA's properly
169171
s_values = ["a", "b", "b", "b", np.nan, np.nan, "d", "d", "a", "a", "b"]
170172
s = klass(s_values)
171-
expected = Series([4, 3, 2], index=["b", "a", "d"])
173+
expected = Series([4, 3, 2], index=["b", "a", "d"], dtype="Int64")
172174
tm.assert_series_equal(s.value_counts(), expected)
173175

174176
if isinstance(s, Index):
@@ -180,7 +182,7 @@ def test_value_counts_bins(index_or_series):
180182
assert s.nunique() == 3
181183

182184
s = klass({}) if klass is dict else klass({}, dtype=object)
183-
expected = Series([], dtype=np.int64)
185+
expected = Series([], dtype="Int64")
184186
tm.assert_series_equal(s.value_counts(), expected, check_index_type=False)
185187
# returned dtype differs depending on original
186188
if isinstance(s, Index):
@@ -216,7 +218,7 @@ def test_value_counts_datetime64(index_or_series):
216218
idx = pd.to_datetime(
217219
["2010-01-01 00:00:00", "2008-09-09 00:00:00", "2009-01-01 00:00:00"]
218220
)
219-
expected_s = Series([3, 2, 1], index=idx)
221+
expected_s = Series([3, 2, 1], index=idx, dtype="Int64")
220222
tm.assert_series_equal(s.value_counts(), expected_s)
221223

222224
expected = np_array_datetime64_compat(
@@ -240,7 +242,7 @@ def test_value_counts_datetime64(index_or_series):
240242

241243
result = s.value_counts(dropna=False)
242244
expected_s[pd.NaT] = 1
243-
tm.assert_series_equal(result, expected_s)
245+
tm.assert_series_equal(result, expected_s.astype("Int64"))
244246

245247
unique = s.unique()
246248
assert unique.dtype == "datetime64[ns]"
@@ -261,7 +263,7 @@ def test_value_counts_datetime64(index_or_series):
261263
td = klass(td, name="dt")
262264

263265
result = td.value_counts()
264-
expected_s = Series([6], index=[Timedelta("1day")], name="dt")
266+
expected_s = Series([6], index=[Timedelta("1day")], name="dt", dtype="Int64")
265267
tm.assert_series_equal(result, expected_s)
266268

267269
expected = TimedeltaIndex(["1 days"], name="dt")

pandas/tests/extension/base/methods.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,10 @@
33
import numpy as np
44
import pytest
55

6-
from pandas.core.dtypes.common import is_bool_dtype
7-
86
import pandas as pd
97
import pandas._testing as tm
8+
from pandas.core.dtypes.common import is_bool_dtype
109
from pandas.core.sorting import nargsort
11-
1210
from .base import BaseExtensionTests
1311

1412

@@ -17,7 +15,7 @@ class BaseMethodsTests(BaseExtensionTests):
1715

1816
@pytest.mark.parametrize("dropna", [True, False])
1917
def test_value_counts(self, all_data, dropna):
20-
all_data = all_data[:10]
18+
all_data = all_data[:5]
2119
if dropna:
2220
other = np.array(all_data[~all_data.isna()])
2321
else:
@@ -28,6 +26,15 @@ def test_value_counts(self, all_data, dropna):
2826

2927
self.assert_series_equal(result, expected)
3028

29+
result = (
30+
pd.Series(all_data, dtype=all_data.dtype)
31+
.value_counts(dropna=dropna, normalize=True)
32+
.sort_index()
33+
)
34+
35+
expected = pd.Series([1 / len(other)] * len(other), index=result.index)
36+
tm.assert_series_equal(expected, result)
37+
3138
def test_count(self, data_missing):
3239
df = pd.DataFrame({"A": data_missing})
3340
result = df.count(axis="columns")

pandas/tests/extension/test_datetime.py

-4
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,6 @@ class TestGetitem(BaseDatetimeTests, base.BaseGetitemTests):
9090

9191

9292
class TestMethods(BaseDatetimeTests, base.BaseMethodsTests):
93-
@pytest.mark.skip(reason="Incorrect expected")
94-
def test_value_counts(self, all_data, dropna):
95-
pass
96-
9793
def test_combine_add(self, data_repeated):
9894
# Timestamp.__add__(Timestamp) not defined
9995
pass

pandas/tests/extension/test_string.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,7 @@ class TestNoReduce(base.BaseNoReduceTests):
8181

8282

8383
class TestMethods(base.BaseMethodsTests):
84-
@pytest.mark.skip(reason="returns nullable")
85-
def test_value_counts(self, all_data, dropna):
86-
return super().test_value_counts(all_data, dropna)
84+
pass
8785

8886

8987
class TestCasting(base.BaseCastingTests):

pandas/tests/frame/test_api.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,7 @@ def test_with_datetimelikes(self):
443443
t = df.T
444444

445445
result = t.dtypes.value_counts()
446-
expected = Series({np.dtype("object"): 10})
446+
expected = Series({np.dtype("object"): 10}, dtype="Int64")
447447
tm.assert_series_equal(result, expected)
448448

449449
def test_values(self, float_frame):

pandas/tests/frame/test_arithmetic.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,8 @@ def test_df_flex_cmp_constant_return_types(self, opname):
325325
const = 2
326326

327327
result = getattr(df, opname)(const).dtypes.value_counts()
328-
tm.assert_series_equal(result, pd.Series([2], index=[np.dtype(bool)]))
328+
expected = pd.Series([2], index=[np.dtype(bool)], dtype="Int64")
329+
tm.assert_series_equal(result, expected)
329330

330331
@pytest.mark.parametrize("opname", ["eq", "ne", "gt", "lt", "ge", "le"])
331332
def test_df_flex_cmp_constant_return_types_empty(self, opname):
@@ -335,7 +336,8 @@ def test_df_flex_cmp_constant_return_types_empty(self, opname):
335336

336337
empty = df.iloc[:0]
337338
result = getattr(empty, opname)(const).dtypes.value_counts()
338-
tm.assert_series_equal(result, pd.Series([2], index=[np.dtype(bool)]))
339+
expected = pd.Series([2], index=[np.dtype(bool)], dtype="Int64")
340+
tm.assert_series_equal(result, expected)
339341

340342

341343
# -------------------------------------------------------------------

pandas/tests/indexes/datetimes/test_ops.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ def test_value_counts_unique(self, tz_naive_fixture):
133133
idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz)
134134

135135
exp_idx = pd.date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz)
136-
expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
136+
expected = Series(range(10, 0, -1), index=exp_idx, dtype="Int64")
137137

138138
for obj in [idx, Series(idx)]:
139139
tm.assert_series_equal(obj.value_counts(), expected)
@@ -154,13 +154,13 @@ def test_value_counts_unique(self, tz_naive_fixture):
154154
)
155155

156156
exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"], tz=tz)
157-
expected = Series([3, 2], index=exp_idx)
157+
expected = Series([3, 2], index=exp_idx, dtype="Int64")
158158

159159
for obj in [idx, Series(idx)]:
160160
tm.assert_series_equal(obj.value_counts(), expected)
161161

162162
exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz)
163-
expected = Series([3, 2, 1], index=exp_idx)
163+
expected = Series([3, 2, 1], index=exp_idx, dtype="Int64")
164164

165165
for obj in [idx, Series(idx)]:
166166
tm.assert_series_equal(obj.value_counts(dropna=False), expected)

pandas/tests/indexes/period/test_ops.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def test_value_counts_unique(self):
4747
],
4848
freq="H",
4949
)
50-
expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
50+
expected = Series(range(10, 0, -1), index=exp_idx, dtype="Int64")
5151

5252
for obj in [idx, Series(idx)]:
5353
tm.assert_series_equal(obj.value_counts(), expected)
@@ -68,13 +68,13 @@ def test_value_counts_unique(self):
6868
)
6969

7070
exp_idx = PeriodIndex(["2013-01-01 09:00", "2013-01-01 08:00"], freq="H")
71-
expected = Series([3, 2], index=exp_idx)
71+
expected = Series([3, 2], index=exp_idx, dtype="Int64")
7272

7373
for obj in [idx, Series(idx)]:
7474
tm.assert_series_equal(obj.value_counts(), expected)
7575

7676
exp_idx = PeriodIndex(["2013-01-01 09:00", "2013-01-01 08:00", NaT], freq="H")
77-
expected = Series([3, 2, 1], index=exp_idx)
77+
expected = Series([3, 2, 1], index=exp_idx, dtype="Int64")
7878

7979
for obj in [idx, Series(idx)]:
8080
tm.assert_series_equal(obj.value_counts(dropna=False), expected)

pandas/tests/indexes/timedeltas/test_ops.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def test_value_counts_unique(self):
1818
idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1)))
1919

2020
exp_idx = timedelta_range("1 days 18:00:00", freq="-1H", periods=10)
21-
expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
21+
expected = Series(range(10, 0, -1), index=exp_idx, dtype="Int64")
2222

2323
for obj in [idx, Series(idx)]:
2424
tm.assert_series_equal(obj.value_counts(), expected)
@@ -38,13 +38,13 @@ def test_value_counts_unique(self):
3838
)
3939

4040
exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00"])
41-
expected = Series([3, 2], index=exp_idx)
41+
expected = Series([3, 2], index=exp_idx, dtype="Int64")
4242

4343
for obj in [idx, Series(idx)]:
4444
tm.assert_series_equal(obj.value_counts(), expected)
4545

4646
exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00", pd.NaT])
47-
expected = Series([3, 2, 1], index=exp_idx)
47+
expected = Series([3, 2, 1], index=exp_idx, dtype="Int64")
4848

4949
for obj in [idx, Series(idx)]:
5050
tm.assert_series_equal(obj.value_counts(dropna=False), expected)

0 commit comments

Comments
 (0)