Skip to content

Commit c6ef5bd

Browse files
split test_value_counts_unique_nunique into 3 tests
1 parent 97c0ce9 commit c6ef5bd

File tree

1 file changed

+42
-23
lines changed

1 file changed

+42
-23
lines changed

pandas/tests/base/test_ops.py

+42-23
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
Series,
2929
Timedelta,
3030
TimedeltaIndex,
31-
Timestamp,
3231
)
3332
import pandas._testing as tm
3433

@@ -39,6 +38,23 @@ def allow_na_ops(obj: Any) -> bool:
3938
return not is_bool_index and obj._can_hold_na
4039

4140

41+
def multiply_values(obj):
42+
"""
43+
Repeat values so that the previous values are ordered (increasing)
44+
by number of occurrences
45+
"""
46+
klass = type(obj)
47+
48+
if isinstance(obj, pd.Index):
49+
return obj.repeat(range(1, len(obj) + 1))
50+
elif isinstance(obj, pd.Series):
51+
indices = np.repeat(np.arange(len(obj)), range(1, len(obj) + 1))
52+
rep = obj.values.take(indices)
53+
idx = obj.index.repeat(range(1, len(obj) + 1))
54+
return klass(rep, index=idx)
55+
raise TypeError(f"Unexpected type: {klass}")
56+
57+
4258
class Ops:
4359
def setup_method(self, method):
4460
self.bool_index = tm.makeBoolIndex(10, name="a")
@@ -205,7 +221,31 @@ def test_ndarray_compat_properties(self, index_or_series_obj):
205221
assert Index([1]).item() == 1
206222
assert Series([1]).item() == 1
207223

208-
def test_value_counts_unique_nunique(self, index_or_series_obj):
224+
def test_unique(self, index_or_series_obj):
225+
obj = multiply_values(index_or_series_obj)
226+
result = obj.unique()
227+
228+
# dict.fromkeys preserves the order
229+
unique_values = list(dict.fromkeys(obj.values))
230+
if isinstance(obj, pd.MultiIndex):
231+
expected = pd.MultiIndex.from_tuples(unique_values)
232+
expected.names = obj.names
233+
tm.assert_index_equal(result, expected)
234+
elif isinstance(obj, pd.Index):
235+
expected = pd.Index(unique_values, dtype=obj.dtype)
236+
if is_datetime64tz_dtype(obj):
237+
expected = expected.normalize()
238+
tm.assert_index_equal(result, expected)
239+
else:
240+
expected = np.array(unique_values)
241+
tm.assert_numpy_array_equal(result, expected)
242+
243+
def test_nunique(self, index_or_series_obj):
244+
obj = multiply_values(index_or_series_obj)
245+
result = obj.nunique(dropna=False)
246+
assert result == len(obj.unique())
247+
248+
def test_value_counts(self, index_or_series_obj):
209249
orig = index_or_series_obj
210250
obj = orig.copy()
211251
klass = type(obj)
@@ -242,27 +282,6 @@ def test_value_counts_unique_nunique(self, index_or_series_obj):
242282
tm.assert_series_equal(result, expected_s)
243283
assert result.index.name is None
244284

245-
result = obj.unique()
246-
if isinstance(obj, Index):
247-
assert isinstance(result, type(obj))
248-
tm.assert_index_equal(result, orig)
249-
assert result.dtype == orig.dtype
250-
elif is_datetime64tz_dtype(obj):
251-
# datetimetz Series returns array of Timestamp
252-
assert result[0] == orig[0]
253-
for r in result:
254-
assert isinstance(r, Timestamp)
255-
256-
tm.assert_numpy_array_equal(
257-
result.astype(object), orig._values.astype(object)
258-
)
259-
else:
260-
tm.assert_numpy_array_equal(result, orig.values)
261-
assert result.dtype == orig.dtype
262-
263-
# dropna=True would break for MultiIndex
264-
assert obj.nunique(dropna=False) == len(np.unique(obj.values))
265-
266285
@pytest.mark.parametrize("null_obj", [np.nan, None])
267286
def test_value_counts_unique_nunique_null(self, null_obj, index_or_series_obj):
268287
orig = index_or_series_obj

0 commit comments

Comments
 (0)