From c6ef5bdf5e9a4d631219027195a1c416cc06946a Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Wed, 26 Feb 2020 18:22:40 +0100 Subject: [PATCH 01/10] split test_value_counts_unique_nunique into 3 tests --- pandas/tests/base/test_ops.py | 65 ++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 23 deletions(-) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index f85d823cb2fac..9eb00cbd4b1d6 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -28,7 +28,6 @@ Series, Timedelta, TimedeltaIndex, - Timestamp, ) import pandas._testing as tm @@ -39,6 +38,23 @@ def allow_na_ops(obj: Any) -> bool: return not is_bool_index and obj._can_hold_na +def multiply_values(obj): + """ + Repeat values so that the previous values are ordered (increasing) + by number of occurrences + """ + klass = type(obj) + + if isinstance(obj, pd.Index): + return obj.repeat(range(1, len(obj) + 1)) + elif isinstance(obj, pd.Series): + indices = np.repeat(np.arange(len(obj)), range(1, len(obj) + 1)) + rep = obj.values.take(indices) + idx = obj.index.repeat(range(1, len(obj) + 1)) + return klass(rep, index=idx) + raise TypeError(f"Unexpected type: {klass}") + + class Ops: def setup_method(self, method): self.bool_index = tm.makeBoolIndex(10, name="a") @@ -205,7 +221,31 @@ def test_ndarray_compat_properties(self, index_or_series_obj): assert Index([1]).item() == 1 assert Series([1]).item() == 1 - def test_value_counts_unique_nunique(self, index_or_series_obj): + def test_unique(self, index_or_series_obj): + obj = multiply_values(index_or_series_obj) + result = obj.unique() + + # dict.fromkeys preserves the order + unique_values = list(dict.fromkeys(obj.values)) + if isinstance(obj, pd.MultiIndex): + expected = pd.MultiIndex.from_tuples(unique_values) + expected.names = obj.names + tm.assert_index_equal(result, expected) + elif isinstance(obj, pd.Index): + expected = pd.Index(unique_values, dtype=obj.dtype) + if is_datetime64tz_dtype(obj): + expected = expected.normalize() + tm.assert_index_equal(result, expected) + else: + expected = np.array(unique_values) + tm.assert_numpy_array_equal(result, expected) + + def test_nunique(self, index_or_series_obj): + obj = multiply_values(index_or_series_obj) + result = obj.nunique(dropna=False) + assert result == len(obj.unique()) + + def test_value_counts(self, index_or_series_obj): orig = index_or_series_obj obj = orig.copy() klass = type(obj) @@ -242,27 +282,6 @@ def test_value_counts_unique_nunique(self, index_or_series_obj): tm.assert_series_equal(result, expected_s) assert result.index.name is None - result = obj.unique() - if isinstance(obj, Index): - assert isinstance(result, type(obj)) - tm.assert_index_equal(result, orig) - assert result.dtype == orig.dtype - elif is_datetime64tz_dtype(obj): - # datetimetz Series returns array of Timestamp - assert result[0] == orig[0] - for r in result: - assert isinstance(r, Timestamp) - - tm.assert_numpy_array_equal( - result.astype(object), orig._values.astype(object) - ) - else: - tm.assert_numpy_array_equal(result, orig.values) - assert result.dtype == orig.dtype - - # dropna=True would break for MultiIndex - assert obj.nunique(dropna=False) == len(np.unique(obj.values)) - @pytest.mark.parametrize("null_obj", [np.nan, None]) def test_value_counts_unique_nunique_null(self, null_obj, index_or_series_obj): orig = index_or_series_obj From 5e1ff44c77718803e855dcd3bab56783342b3c5a Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Wed, 26 Feb 2020 18:31:23 +0100 Subject: [PATCH 02/10] simplified test_value_counts --- pandas/tests/base/test_ops.py | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index 9eb00cbd4b1d6..c53c01f8de98b 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -247,33 +247,19 @@ def test_nunique(self, index_or_series_obj): def test_value_counts(self, index_or_series_obj): orig = index_or_series_obj - obj = orig.copy() - klass = type(obj) - values = obj._values + obj = multiply_values(orig.copy()) if orig.duplicated().any(): + # FIXME: duplicated values should work. pytest.xfail( "The test implementation isn't flexible enough to deal" " with duplicated values. This isn't a bug in the" " application code, but in the test code." ) - # create repeated values, 'n'th element is repeated by n+1 times - if isinstance(obj, Index): - expected_index = Index(obj[::-1]) - expected_index.name = None - obj = obj.repeat(range(1, len(obj) + 1)) - else: - expected_index = Index(values[::-1]) - idx = obj.index.repeat(range(1, len(obj) + 1)) - # take-based repeat - indices = np.repeat(np.arange(len(obj)), range(1, len(obj) + 1)) - rep = values.take(indices) - obj = klass(rep, index=idx) - - # check values has the same dtype as the original - assert obj.dtype == orig.dtype - + expected_index = Index(orig.values[::-1], dtype=orig.dtype) + if is_datetime64tz_dtype(obj): + expected_index = expected_index.normalize() expected_s = Series( range(len(orig), 0, -1), index=expected_index, dtype="int64" ) From 3abe0c16e47518e0f211445f059b0b9a99973e26 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Wed, 26 Feb 2020 23:15:53 +0100 Subject: [PATCH 03/10] simplified and fixed test_value_counts --- pandas/tests/base/test_ops.py | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index c53c01f8de98b..04587cc51e3f9 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -1,3 +1,4 @@ +import collections from datetime import datetime, timedelta from io import StringIO import sys @@ -246,27 +247,15 @@ def test_nunique(self, index_or_series_obj): assert result == len(obj.unique()) def test_value_counts(self, index_or_series_obj): - orig = index_or_series_obj - obj = multiply_values(orig.copy()) - - if orig.duplicated().any(): - # FIXME: duplicated values should work. - pytest.xfail( - "The test implementation isn't flexible enough to deal" - " with duplicated values. This isn't a bug in the" - " application code, but in the test code." - ) - - expected_index = Index(orig.values[::-1], dtype=orig.dtype) - if is_datetime64tz_dtype(obj): - expected_index = expected_index.normalize() - expected_s = Series( - range(len(orig), 0, -1), index=expected_index, dtype="int64" - ) - + obj = multiply_values(index_or_series_obj) result = obj.value_counts() - tm.assert_series_equal(result, expected_s) - assert result.index.name is None + + counter = collections.Counter(obj) + expected = pd.Series(dict(counter.most_common()), dtype=int) + expected.index = expected.index.astype(obj.dtype) + if isinstance(obj, pd.MultiIndex): + expected.index = pd.Index(expected.index) + tm.assert_series_equal(result, expected) @pytest.mark.parametrize("null_obj", [np.nan, None]) def test_value_counts_unique_nunique_null(self, null_obj, index_or_series_obj): From f2a4ac6db47f3b2b8bad163144fd2f75bb0371fa Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Wed, 26 Feb 2020 23:18:51 +0100 Subject: [PATCH 04/10] renamed helper --- pandas/tests/base/test_ops.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index 04587cc51e3f9..cd212cfe4d669 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -39,7 +39,7 @@ def allow_na_ops(obj: Any) -> bool: return not is_bool_index and obj._can_hold_na -def multiply_values(obj): +def repeat_values(obj): """ Repeat values so that the previous values are ordered (increasing) by number of occurrences @@ -223,7 +223,7 @@ def test_ndarray_compat_properties(self, index_or_series_obj): assert Series([1]).item() == 1 def test_unique(self, index_or_series_obj): - obj = multiply_values(index_or_series_obj) + obj = repeat_values(index_or_series_obj) result = obj.unique() # dict.fromkeys preserves the order @@ -242,12 +242,12 @@ def test_unique(self, index_or_series_obj): tm.assert_numpy_array_equal(result, expected) def test_nunique(self, index_or_series_obj): - obj = multiply_values(index_or_series_obj) + obj = repeat_values(index_or_series_obj) result = obj.nunique(dropna=False) assert result == len(obj.unique()) def test_value_counts(self, index_or_series_obj): - obj = multiply_values(index_or_series_obj) + obj = repeat_values(index_or_series_obj) result = obj.value_counts() counter = collections.Counter(obj) From e9e44899dab66cd89315a242b46610fdbab3ffd1 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Thu, 27 Feb 2020 00:28:29 +0100 Subject: [PATCH 05/10] added sort_index to fix flaky tests --- pandas/tests/base/test_ops.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index cd212cfe4d669..71c03e5ccbe2d 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -251,10 +251,14 @@ def test_value_counts(self, index_or_series_obj): result = obj.value_counts() counter = collections.Counter(obj) - expected = pd.Series(dict(counter.most_common()), dtype=int) + expected = pd.Series(dict(counter.most_common()), dtype=np.int64) expected.index = expected.index.astype(obj.dtype) if isinstance(obj, pd.MultiIndex): expected.index = pd.Index(expected.index) + + # sort_index to avoid switched order when values share the same count + result = result.sort_index() + expected = expected.sort_index() tm.assert_series_equal(result, expected) @pytest.mark.parametrize("null_obj", [np.nan, None]) From 6f13349740d286e584a187692382e8c29bf39932 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Thu, 27 Feb 2020 18:26:05 +0100 Subject: [PATCH 06/10] split and simplified test_nunique_null and test_unique_null --- pandas/conftest.py | 4 +- pandas/tests/base/test_ops.py | 100 ++++++++++++++++++++++++---------- 2 files changed, 74 insertions(+), 30 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index be44e6c2b36da..03bc4ea68e9b6 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1015,7 +1015,7 @@ def _create_series(index): """ Helper for the _series dict """ size = len(index) data = np.random.randn(size) - return pd.Series(data, index=index, name="a") + return pd.Series(data, index=index) _series = { @@ -1043,7 +1043,7 @@ def series_with_simple_index(indices): np.uint32, ] _narrow_series = { - f"{dtype.__name__}-series": tm.makeFloatSeries(name="a").astype(dtype) + f"{dtype.__name__}-series": tm.makeFloatSeries().astype(dtype) for dtype in _narrow_dtypes } diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index 71c03e5ccbe2d..e131d213f9e55 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -44,16 +44,9 @@ def repeat_values(obj): Repeat values so that the previous values are ordered (increasing) by number of occurrences """ - klass = type(obj) - - if isinstance(obj, pd.Index): + if isinstance(obj, (pd.Index, pd.Series)): return obj.repeat(range(1, len(obj) + 1)) - elif isinstance(obj, pd.Series): - indices = np.repeat(np.arange(len(obj)), range(1, len(obj) + 1)) - rep = obj.values.take(indices) - idx = obj.index.repeat(range(1, len(obj) + 1)) - return klass(rep, index=idx) - raise TypeError(f"Unexpected type: {klass}") + return np.repeat(obj, range(1, len(obj) + 1)) class Ops: @@ -241,10 +234,77 @@ def test_unique(self, index_or_series_obj): expected = np.array(unique_values) tm.assert_numpy_array_equal(result, expected) + @pytest.mark.parametrize("null_obj", [np.nan, None]) + def test_unique_null(self, null_obj, index_or_series_obj): + obj = index_or_series_obj + + if not allow_na_ops(obj): + pytest.skip("type doesn't allow for NA operations") + elif len(obj) < 1: + pytest.skip("Test doesn't make sense on empty data") + elif isinstance(obj, pd.MultiIndex): + pytest.skip("MultiIndex doesn't support isna") + + values = obj.values + if needs_i8_conversion(obj): + values[0:2] = iNaT + else: + values[0:2] = null_obj + + klass = type(obj) + obj = klass(repeat_values(values), dtype=obj.dtype) + result = obj.unique() + + unique_values_raw = dict.fromkeys(obj.values) + # because np.nan == np.nan is False, but None == None is True + # np.nan would be duplicated, whereas None wouldn't + unique_values_not_null = [ + val for val in unique_values_raw if not pd.isnull(val) + ] + unique_values = [null_obj] + unique_values_not_null + + if isinstance(obj, pd.Index): + expected = pd.Index(unique_values, dtype=obj.dtype) + if is_datetime64tz_dtype(obj): + result = result.normalize() + expected = expected.normalize() + elif isinstance(obj, pd.CategoricalIndex): + expected = expected.set_categories(unique_values_not_null) + tm.assert_index_equal(result, expected) + else: + expected = np.array(unique_values, dtype=obj.dtype) + tm.assert_numpy_array_equal(result, expected) + def test_nunique(self, index_or_series_obj): obj = repeat_values(index_or_series_obj) - result = obj.nunique(dropna=False) - assert result == len(obj.unique()) + expected = len(obj.unique()) + assert obj.nunique(dropna=False) == expected + + @pytest.mark.parametrize("null_obj", [np.nan, None]) + def test_nunique_null(self, null_obj, index_or_series_obj): + obj = index_or_series_obj + num_values = len(obj) + + if not allow_na_ops(obj): + pytest.skip("type doesn't allow for NA operations") + elif isinstance(obj, pd.MultiIndex): + pytest.skip("MultiIndex doesn't support isna") + + values = obj.values + if needs_i8_conversion(obj): + values[0:2] = iNaT + else: + values[0:2] = null_obj + + klass = type(obj) + obj = klass(repeat_values(values), dtype=obj.dtype) + + if isinstance(obj, pd.CategoricalIndex): + assert obj.nunique() == len(obj.categories) + assert obj.nunique(dropna=False) == len(obj.categories) + 1 + else: + assert obj.nunique() == max(0, num_values - 2) + assert obj.nunique(dropna=False) == max(0, num_values - 1) def test_value_counts(self, index_or_series_obj): obj = repeat_values(index_or_series_obj) @@ -262,7 +322,7 @@ def test_value_counts(self, index_or_series_obj): tm.assert_series_equal(result, expected) @pytest.mark.parametrize("null_obj", [np.nan, None]) - def test_value_counts_unique_nunique_null(self, null_obj, index_or_series_obj): + def test_value_counts_null(self, null_obj, index_or_series_obj): orig = index_or_series_obj obj = orig.copy() klass = type(obj) @@ -358,22 +418,6 @@ def test_value_counts_unique_nunique_null(self, null_obj, index_or_series_obj): assert result_s.index.name is None assert result_s.name == "a" - result = obj.unique() - if isinstance(obj, Index): - tm.assert_index_equal(result, Index(values[1:], name="a")) - elif is_datetime64tz_dtype(obj): - # unable to compare NaT / nan - tm.assert_extension_array_equal(result[1:], values[2:]) - assert result[0] is pd.NaT - elif len(obj) > 0: - tm.assert_numpy_array_equal(result[1:], values[2:]) - - assert pd.isna(result[0]) - assert result.dtype == orig.dtype - - assert obj.nunique() == max(0, num_values - 2) - assert obj.nunique(dropna=False) == max(0, num_values - 1) - def test_value_counts_inferred(self, index_or_series): klass = index_or_series s_values = ["a", "b", "b", "b", "b", "c", "d", "d", "a", "a"] From 411a9a011f1506c9e42d26b63522791e14e81e9f Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Thu, 27 Feb 2020 19:19:00 +0100 Subject: [PATCH 07/10] finished refactoring test_value_counts_null --- pandas/tests/base/test_ops.py | 103 +++++++--------------------------- 1 file changed, 20 insertions(+), 83 deletions(-) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index e131d213f9e55..a93ba9fbe038c 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -25,7 +25,6 @@ Index, Interval, IntervalIndex, - PeriodIndex, Series, Timedelta, TimedeltaIndex, @@ -283,7 +282,6 @@ def test_nunique(self, index_or_series_obj): @pytest.mark.parametrize("null_obj", [np.nan, None]) def test_nunique_null(self, null_obj, index_or_series_obj): obj = index_or_series_obj - num_values = len(obj) if not allow_na_ops(obj): pytest.skip("type doesn't allow for NA operations") @@ -303,8 +301,9 @@ def test_nunique_null(self, null_obj, index_or_series_obj): assert obj.nunique() == len(obj.categories) assert obj.nunique(dropna=False) == len(obj.categories) + 1 else: - assert obj.nunique() == max(0, num_values - 2) - assert obj.nunique(dropna=False) == max(0, num_values - 1) + num_unique_values = len(obj.unique()) + assert obj.nunique() == max(0, num_unique_values - 1) + assert obj.nunique(dropna=False) == max(0, num_unique_values) def test_value_counts(self, index_or_series_obj): obj = repeat_values(index_or_series_obj) @@ -325,98 +324,36 @@ def test_value_counts(self, index_or_series_obj): def test_value_counts_null(self, null_obj, index_or_series_obj): orig = index_or_series_obj obj = orig.copy() - klass = type(obj) - values = obj._ndarray_values - num_values = len(orig) if not allow_na_ops(obj): pytest.skip("type doesn't allow for NA operations") - elif isinstance(orig, (pd.CategoricalIndex, pd.IntervalIndex)): - pytest.skip(f"values of {klass} cannot be changed") + elif len(obj) < 1: + pytest.skip("Test doesn't make sense on empty data") elif isinstance(orig, pd.MultiIndex): pytest.skip("MultiIndex doesn't support isna") - elif orig.duplicated().any(): - pytest.xfail( - "The test implementation isn't flexible enough to deal" - " with duplicated values. This isn't a bug in the" - " application code, but in the test code." - ) - - # special assign to the numpy array - if is_datetime64tz_dtype(obj): - if isinstance(obj, DatetimeIndex): - v = obj.asi8 - v[0:2] = iNaT - values = obj._shallow_copy(v) - else: - obj = obj.copy() - obj[0:2] = pd.NaT - values = obj._values - elif needs_i8_conversion(obj): + values = obj.values + if needs_i8_conversion(obj): values[0:2] = iNaT - values = obj._shallow_copy(values) else: values[0:2] = null_obj - # check values has the same dtype as the original - assert values.dtype == obj.dtype + klass = type(obj) + obj = klass(repeat_values(values), dtype=obj.dtype) - # create repeated values, 'n'th element is repeated by n+1 - # times - if isinstance(obj, (DatetimeIndex, PeriodIndex)): - expected_index = obj.copy() - expected_index.name = None + # because np.nan == np.nan is False, but None == None is True + # np.nan would be duplicated, whereas None wouldn't + counter = collections.Counter(obj.dropna()) + expected = pd.Series(dict(counter.most_common()), dtype=np.int64) + expected.index = expected.index.astype(obj.dtype) - # attach name to klass - obj = klass(values.repeat(range(1, len(obj) + 1))) - obj.name = "a" - else: - if isinstance(obj, DatetimeIndex): - expected_index = orig._values._shallow_copy(values) - else: - expected_index = Index(values) - expected_index.name = None - obj = obj.repeat(range(1, len(obj) + 1)) - obj.name = "a" - - # check values has the same dtype as the original - assert obj.dtype == orig.dtype - - # check values correctly have NaN - nanloc = np.zeros(len(obj), dtype=np.bool) - nanloc[:3] = True - if isinstance(obj, Index): - tm.assert_numpy_array_equal(pd.isna(obj), nanloc) - else: - exp = Series(nanloc, obj.index, name="a") - tm.assert_series_equal(pd.isna(obj), exp) - - expected_data = list(range(num_values, 2, -1)) - expected_data_na = expected_data.copy() - if expected_data_na: - expected_data_na.append(3) - expected_s_na = Series( - expected_data_na, - index=expected_index[num_values - 1 : 0 : -1], - dtype="int64", - name="a", - ) - expected_s = Series( - expected_data, - index=expected_index[num_values - 1 : 1 : -1], - dtype="int64", - name="a", - ) + tm.assert_series_equal(obj.value_counts(), expected) - result_s_na = obj.value_counts(dropna=False) - tm.assert_series_equal(result_s_na, expected_s_na) - assert result_s_na.index.name is None - assert result_s_na.name == "a" - result_s = obj.value_counts() - tm.assert_series_equal(obj.value_counts(), expected_s) - assert result_s.index.name is None - assert result_s.name == "a" + # can't use expected[null_obj] = 3 as + # IntervalIndex doesn't allow assignment + new_entry = pd.Series({np.nan: 3}, dtype=np.int64) + expected = expected.append(new_entry) + tm.assert_series_equal(obj.value_counts(dropna=False), expected) def test_value_counts_inferred(self, index_or_series): klass = index_or_series From 25eb1cc6b327dd0a35717a98e4be8fa6fb2d78c6 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Thu, 27 Feb 2020 19:25:04 +0100 Subject: [PATCH 08/10] dissolved repeat_values helper function --- pandas/tests/base/test_ops.py | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index a93ba9fbe038c..c34893b03c50f 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -38,16 +38,6 @@ def allow_na_ops(obj: Any) -> bool: return not is_bool_index and obj._can_hold_na -def repeat_values(obj): - """ - Repeat values so that the previous values are ordered (increasing) - by number of occurrences - """ - if isinstance(obj, (pd.Index, pd.Series)): - return obj.repeat(range(1, len(obj) + 1)) - return np.repeat(obj, range(1, len(obj) + 1)) - - class Ops: def setup_method(self, method): self.bool_index = tm.makeBoolIndex(10, name="a") @@ -215,7 +205,8 @@ def test_ndarray_compat_properties(self, index_or_series_obj): assert Series([1]).item() == 1 def test_unique(self, index_or_series_obj): - obj = repeat_values(index_or_series_obj) + obj = index_or_series_obj + obj = np.repeat(obj, range(1, len(obj) + 1)) result = obj.unique() # dict.fromkeys preserves the order @@ -251,7 +242,8 @@ def test_unique_null(self, null_obj, index_or_series_obj): values[0:2] = null_obj klass = type(obj) - obj = klass(repeat_values(values), dtype=obj.dtype) + repeated_values = np.repeat(values, range(1, len(values) + 1)) + obj = klass(repeated_values, dtype=obj.dtype) result = obj.unique() unique_values_raw = dict.fromkeys(obj.values) @@ -275,7 +267,8 @@ def test_unique_null(self, null_obj, index_or_series_obj): tm.assert_numpy_array_equal(result, expected) def test_nunique(self, index_or_series_obj): - obj = repeat_values(index_or_series_obj) + obj = index_or_series_obj + obj = np.repeat(obj, range(1, len(obj) + 1)) expected = len(obj.unique()) assert obj.nunique(dropna=False) == expected @@ -295,7 +288,8 @@ def test_nunique_null(self, null_obj, index_or_series_obj): values[0:2] = null_obj klass = type(obj) - obj = klass(repeat_values(values), dtype=obj.dtype) + repeated_values = np.repeat(values, range(1, len(values) + 1)) + obj = klass(repeated_values, dtype=obj.dtype) if isinstance(obj, pd.CategoricalIndex): assert obj.nunique() == len(obj.categories) @@ -306,7 +300,8 @@ def test_nunique_null(self, null_obj, index_or_series_obj): assert obj.nunique(dropna=False) == max(0, num_unique_values) def test_value_counts(self, index_or_series_obj): - obj = repeat_values(index_or_series_obj) + obj = index_or_series_obj + obj = np.repeat(obj, range(1, len(obj) + 1)) result = obj.value_counts() counter = collections.Counter(obj) @@ -339,7 +334,8 @@ def test_value_counts_null(self, null_obj, index_or_series_obj): values[0:2] = null_obj klass = type(obj) - obj = klass(repeat_values(values), dtype=obj.dtype) + repeated_values = np.repeat(values, range(1, len(values) + 1)) + obj = klass(repeated_values, dtype=obj.dtype) # because np.nan == np.nan is False, but None == None is True # np.nan would be duplicated, whereas None wouldn't From ee942813a275bca832f460fc4345ef6a477ea14d Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Thu, 27 Feb 2020 22:40:32 +0100 Subject: [PATCH 09/10] updated skip message for MultiIndex cases --- pandas/tests/base/test_ops.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index c34893b03c50f..bc0f61483b10b 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -233,7 +233,7 @@ def test_unique_null(self, null_obj, index_or_series_obj): elif len(obj) < 1: pytest.skip("Test doesn't make sense on empty data") elif isinstance(obj, pd.MultiIndex): - pytest.skip("MultiIndex doesn't support isna") + pytest.skip(f"MultiIndex can't hold '{null_obj}'") values = obj.values if needs_i8_conversion(obj): @@ -279,7 +279,7 @@ def test_nunique_null(self, null_obj, index_or_series_obj): if not allow_na_ops(obj): pytest.skip("type doesn't allow for NA operations") elif isinstance(obj, pd.MultiIndex): - pytest.skip("MultiIndex doesn't support isna") + pytest.skip(f"MultiIndex can't hold '{null_obj}'") values = obj.values if needs_i8_conversion(obj): @@ -325,7 +325,7 @@ def test_value_counts_null(self, null_obj, index_or_series_obj): elif len(obj) < 1: pytest.skip("Test doesn't make sense on empty data") elif isinstance(orig, pd.MultiIndex): - pytest.skip("MultiIndex doesn't support isna") + pytest.skip(f"MultiIndex can't hold '{null_obj}'") values = obj.values if needs_i8_conversion(obj): From 07df52a181ba6fe57fb7de4f765a23b0fd2c6928 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Tue, 3 Mar 2020 22:31:11 +0100 Subject: [PATCH 10/10] brought back name in series fixtures --- pandas/conftest.py | 4 ++-- pandas/tests/base/test_ops.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 03bc4ea68e9b6..be44e6c2b36da 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1015,7 +1015,7 @@ def _create_series(index): """ Helper for the _series dict """ size = len(index) data = np.random.randn(size) - return pd.Series(data, index=index) + return pd.Series(data, index=index, name="a") _series = { @@ -1043,7 +1043,7 @@ def series_with_simple_index(indices): np.uint32, ] _narrow_series = { - f"{dtype.__name__}-series": tm.makeFloatSeries().astype(dtype) + f"{dtype.__name__}-series": tm.makeFloatSeries(name="a").astype(dtype) for dtype in _narrow_dtypes } diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index bc0f61483b10b..4ea99c2e37a17 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -305,7 +305,7 @@ def test_value_counts(self, index_or_series_obj): result = obj.value_counts() counter = collections.Counter(obj) - expected = pd.Series(dict(counter.most_common()), dtype=np.int64) + expected = pd.Series(dict(counter.most_common()), dtype=np.int64, name=obj.name) expected.index = expected.index.astype(obj.dtype) if isinstance(obj, pd.MultiIndex): expected.index = pd.Index(expected.index)