From 913441b52817e76b7ea02fff1835281fb5144a43 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Sun, 16 Feb 2020 22:28:04 +0100 Subject: [PATCH 1/5] started to use fixtures in TestIndexOps tests --- pandas/conftest.py | 11 +- pandas/tests/base/test_ops.py | 396 +++++++++++++++++---------------- pandas/tests/indexes/common.py | 8 +- 3 files changed, 215 insertions(+), 200 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 7851cba9cd91a..e065d10058faa 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -967,7 +967,7 @@ def __len__(self): "uint": tm.makeUIntIndex(100), "range": tm.makeRangeIndex(100), "float": tm.makeFloatIndex(100), - "bool": tm.makeBoolIndex(2), + "bool": tm.makeBoolIndex(10), "categorical": tm.makeCategoricalIndex(100), "interval": tm.makeIntervalIndex(100), "empty": Index([]), @@ -978,6 +978,7 @@ def __len__(self): @pytest.fixture(params=indices_dict.keys()) def indices(request): + """ Fixture for all kinds of indices. """ # copy to avoid mutation, e.g. setting .name return indices_dict[request.param].copy() @@ -995,6 +996,14 @@ def _create_series(index): } +@pytest.fixture(params=_series.keys()) +def series_with_differing_index(request): + """ + Fixture for tests on series with different types of indices. + """ + return _series[request.param].copy() + + _narrow_dtypes = [ np.float16, np.float32, diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index 9deb56f070d56..e1cb3995214f2 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -137,227 +137,233 @@ def setup_method(self, method): self.is_valid_objs = self.objs self.not_valid_objs = [] - def test_none_comparison(self): + def test_none_comparison(self, series_with_differing_index): + o = series_with_differing_index + if isinstance(o.index, IntervalIndex): + pytest.skip("IntervalIndex is immutable") + if len(o) < 1: + pytest.skip("Test doesn't make sense on empty data") # bug brought up by #1079 # changed from TypeError in 0.17.0 - for o in self.is_valid_objs: - if isinstance(o, Series): - - o[0] = np.nan - - # noinspection PyComparisonWithNone - result = o == None # noqa - assert not result.iat[0] - assert not result.iat[1] - - # noinspection PyComparisonWithNone - result = o != None # noqa - assert result.iat[0] - assert result.iat[1] - - result = None == o # noqa - assert not result.iat[0] - assert not result.iat[1] - - result = None != o # noqa - assert result.iat[0] - assert result.iat[1] - - if is_datetime64_dtype(o) or is_datetime64tz_dtype(o): - # Following DatetimeIndex (and Timestamp) convention, - # inequality comparisons with Series[datetime64] raise - msg = "Invalid comparison" - with pytest.raises(TypeError, match=msg): - None > o - with pytest.raises(TypeError, match=msg): - o > None - else: - result = None > o - assert not result.iat[0] - assert not result.iat[1] + o[0] = np.nan + + # noinspection PyComparisonWithNone + result = o == None # noqa + assert not result.iat[0] + assert not result.iat[1] + + # noinspection PyComparisonWithNone + result = o != None # noqa + assert result.iat[0] + assert result.iat[1] + + result = None == o # noqa + assert not result.iat[0] + assert not result.iat[1] + + result = None != o # noqa + assert result.iat[0] + assert result.iat[1] + + if is_datetime64_dtype(o) or is_datetime64tz_dtype(o): + # Following DatetimeIndex (and Timestamp) convention, + # inequality comparisons with Series[datetime64] raise + msg = "Invalid comparison" + with pytest.raises(TypeError, match=msg): + None > o + with pytest.raises(TypeError, match=msg): + o > None + else: + result = None > o + assert not result.iat[0] + assert not result.iat[1] - result = o < None - assert not result.iat[0] - assert not result.iat[1] + result = o < None + assert not result.iat[0] + assert not result.iat[1] - def test_ndarray_compat_properties(self): + def test_ndarray_compat_properties(self, index_or_series_obj): + o = index_or_series_obj - for o in self.objs: - # Check that we work. - for p in ["shape", "dtype", "T", "nbytes"]: - assert getattr(o, p, None) is not None + # Check that we work. + for p in ["shape", "dtype", "T", "nbytes"]: + assert getattr(o, p, None) is not None - # deprecated properties - for p in ["flags", "strides", "itemsize", "base", "data"]: - assert not hasattr(o, p) + # deprecated properties + for p in ["flags", "strides", "itemsize", "base", "data"]: + assert not hasattr(o, p) - msg = "can only convert an array of size 1 to a Python scalar" - with pytest.raises(ValueError, match=msg): - o.item() # len > 1 + msg = "can only convert an array of size 1 to a Python scalar" + with pytest.raises(ValueError, match=msg): + o.item() # len > 1 - assert o.ndim == 1 - assert o.size == len(o) + assert o.ndim == 1 + assert o.size == len(o) assert Index([1]).item() == 1 assert Series([1]).item() == 1 - def test_value_counts_unique_nunique(self): - for orig in self.objs: - o = orig.copy() - klass = type(o) - values = o._values - - if isinstance(values, Index): - # reset name not to affect latter process - values.name = None + def test_value_counts_unique_nunique(self, index_or_series_obj): + orig = index_or_series_obj + o = orig.copy() + klass = type(o) + values = o._values - # create repeated values, 'n'th element is repeated by n+1 times - # skip boolean, because it only has 2 values at most - if isinstance(o, Index) and o.is_boolean(): - continue - elif isinstance(o, Index): - expected_index = Index(o[::-1]) - expected_index.name = None - o = o.repeat(range(1, len(o) + 1)) - o.name = "a" - else: - expected_index = Index(values[::-1]) - idx = o.index.repeat(range(1, len(o) + 1)) - # take-based repeat - indices = np.repeat(np.arange(len(o)), range(1, len(o) + 1)) - rep = values.take(indices) - o = klass(rep, index=idx, name="a") - - # check values has the same dtype as the original - assert o.dtype == orig.dtype - - expected_s = Series( - range(10, 0, -1), index=expected_index, dtype="int64", name="a" - ) + if orig.duplicated().any(): + pytest.skip("test implementation breaks with duplicated values") - result = o.value_counts() - tm.assert_series_equal(result, expected_s) - assert result.index.name is None - assert result.name == "a" + # create repeated values, 'n'th element is repeated by n+1 times + if isinstance(o, Index): + expected_index = Index(o[::-1]) + expected_index.name = None + o = o.repeat(range(1, len(o) + 1)) + else: + expected_index = Index(values[::-1]) + idx = o.index.repeat(range(1, len(o) + 1)) + # take-based repeat + indices = np.repeat(np.arange(len(o)), range(1, len(o) + 1)) + rep = values.take(indices) + o = klass(rep, index=idx) + + # check values has the same dtype as the original + assert o.dtype == orig.dtype + + expected_s = Series( + range(len(orig), 0, -1), index=expected_index, dtype="int64" + ) - result = o.unique() - if isinstance(o, Index): - assert isinstance(result, type(o)) - tm.assert_index_equal(result, orig) - assert result.dtype == orig.dtype - elif is_datetime64tz_dtype(o): - # datetimetz Series returns array of Timestamp - assert result[0] == orig[0] - for r in result: - assert isinstance(r, Timestamp) - - tm.assert_numpy_array_equal( - result.astype(object), orig._values.astype(object) - ) - else: - tm.assert_numpy_array_equal(result, orig.values) - assert result.dtype == orig.dtype + result = o.value_counts() + tm.assert_series_equal(result, expected_s) + assert result.index.name is None + + result = o.unique() + if isinstance(o, Index): + assert isinstance(result, type(o)) + tm.assert_index_equal(result, orig) + assert result.dtype == orig.dtype + elif is_datetime64tz_dtype(o): + # datetimetz Series returns array of Timestamp + assert result[0] == orig[0] + for r in result: + assert isinstance(r, Timestamp) + + tm.assert_numpy_array_equal( + result.astype(object), orig._values.astype(object) + ) + else: + tm.assert_numpy_array_equal(result, orig.values) + assert result.dtype == orig.dtype - assert o.nunique() == len(np.unique(o.values)) + # dropna=True would break for MultiIndex + assert o.nunique(dropna=False) == len(np.unique(o.values)) @pytest.mark.parametrize("null_obj", [np.nan, None]) - def test_value_counts_unique_nunique_null(self, null_obj): - - for orig in self.objs: - o = orig.copy() - klass = type(o) - values = o._ndarray_values - - if not allow_na_ops(o): - continue - - # special assign to the numpy array - if is_datetime64tz_dtype(o): - if isinstance(o, DatetimeIndex): - v = o.asi8 - v[0:2] = iNaT - values = o._shallow_copy(v) - else: - o = o.copy() - o[0:2] = pd.NaT - values = o._values - - elif needs_i8_conversion(o): - values[0:2] = iNaT - values = o._shallow_copy(values) + def test_value_counts_unique_nunique_null(self, null_obj, index_or_series_obj): + orig = index_or_series_obj + o = orig.copy() + klass = type(o) + values = o._ndarray_values + num_values = len(orig) + + if not allow_na_ops(o): + pytest.skip("type doesn't allow for NA operations") + elif isinstance(orig, (pd.CategoricalIndex, pd.IntervalIndex)): + pytest.skip(f"values of {klass} cannot be changed") + elif isinstance(orig, pd.MultiIndex): + pytest.skip("MultiIndex doesn't support isna") + + # special assign to the numpy array + if is_datetime64tz_dtype(o): + if isinstance(o, DatetimeIndex): + v = o.asi8 + v[0:2] = iNaT + values = o._shallow_copy(v) else: - values[0:2] = null_obj - # check values has the same dtype as the original + o = o.copy() + o[0:2] = pd.NaT + values = o._values - assert values.dtype == o.dtype + elif needs_i8_conversion(o): + values[0:2] = iNaT + values = o._shallow_copy(values) + else: + values[0:2] = null_obj - # create repeated values, 'n'th element is repeated by n+1 - # times - if isinstance(o, (DatetimeIndex, PeriodIndex)): - expected_index = o.copy() - expected_index.name = None + # check values has the same dtype as the original + assert values.dtype == o.dtype - # attach name to klass - o = klass(values.repeat(range(1, len(o) + 1))) - o.name = "a" - else: - if isinstance(o, DatetimeIndex): - expected_index = orig._values._shallow_copy(values) - else: - expected_index = Index(values) - expected_index.name = None - o = o.repeat(range(1, len(o) + 1)) - o.name = "a" - - # check values has the same dtype as the original - assert o.dtype == orig.dtype - # check values correctly have NaN - nanloc = np.zeros(len(o), dtype=np.bool) - nanloc[:3] = True - if isinstance(o, Index): - tm.assert_numpy_array_equal(pd.isna(o), nanloc) - else: - exp = Series(nanloc, o.index, name="a") - tm.assert_series_equal(pd.isna(o), exp) - - expected_s_na = Series( - list(range(10, 2, -1)) + [3], - index=expected_index[9:0:-1], - dtype="int64", - name="a", - ) - expected_s = Series( - list(range(10, 2, -1)), - index=expected_index[9:1:-1], - dtype="int64", - name="a", - ) + # create repeated values, 'n'th element is repeated by n+1 + # times + if isinstance(o, (DatetimeIndex, PeriodIndex)): + expected_index = o.copy() + expected_index.name = None - result_s_na = o.value_counts(dropna=False) - tm.assert_series_equal(result_s_na, expected_s_na) - assert result_s_na.index.name is None - assert result_s_na.name == "a" - result_s = o.value_counts() - tm.assert_series_equal(o.value_counts(), expected_s) - assert result_s.index.name is None - assert result_s.name == "a" - - result = o.unique() - if isinstance(o, Index): - tm.assert_index_equal(result, Index(values[1:], name="a")) - elif is_datetime64tz_dtype(o): - # unable to compare NaT / nan - tm.assert_extension_array_equal(result[1:], values[2:]) - assert result[0] is pd.NaT + # attach name to klass + o = klass(values.repeat(range(1, len(o) + 1))) + o.name = "a" + else: + if isinstance(o, DatetimeIndex): + expected_index = orig._values._shallow_copy(values) else: - tm.assert_numpy_array_equal(result[1:], values[2:]) - - assert pd.isna(result[0]) - assert result.dtype == orig.dtype + expected_index = Index(values) + expected_index.name = None + o = o.repeat(range(1, len(o) + 1)) + o.name = "a" + + # check values has the same dtype as the original + assert o.dtype == orig.dtype + + # check values correctly have NaN + nanloc = np.zeros(len(o), dtype=np.bool) + nanloc[:3] = True + if isinstance(o, Index): + tm.assert_numpy_array_equal(pd.isna(o), nanloc) + else: + exp = Series(nanloc, o.index, name="a") + tm.assert_series_equal(pd.isna(o), exp) + + expected_data = list(range(num_values, 2, -1)) + expected_data_na = expected_data.copy() + if expected_data_na: + expected_data_na.append(3) + expected_s_na = Series( + expected_data_na, + index=expected_index[num_values - 1 : 0 : -1], + dtype="int64", + name="a", + ) + expected_s = Series( + expected_data, + index=expected_index[num_values - 1 : 1 : -1], + dtype="int64", + name="a", + ) - assert o.nunique() == 8 - assert o.nunique(dropna=False) == 9 + result_s_na = o.value_counts(dropna=False) + tm.assert_series_equal(result_s_na, expected_s_na) + assert result_s_na.index.name is None + assert result_s_na.name == "a" + result_s = o.value_counts() + tm.assert_series_equal(o.value_counts(), expected_s) + assert result_s.index.name is None + assert result_s.name == "a" + + result = o.unique() + if isinstance(o, Index): + tm.assert_index_equal(result, Index(values[1:], name="a")) + elif is_datetime64tz_dtype(o): + # unable to compare NaT / nan + tm.assert_extension_array_equal(result[1:], values[2:]) + assert result[0] is pd.NaT + elif len(o) > 0: + tm.assert_numpy_array_equal(result[1:], values[2:]) + + assert pd.isna(result[0]) + assert result.dtype == orig.dtype + + assert o.nunique() == max(0, num_values - 2) + assert o.nunique(dropna=False) == max(0, num_values - 1) def test_value_counts_inferred(self, index_or_series): klass = index_or_series diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 2073aa0727809..a7437b39872be 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -514,12 +514,12 @@ def test_union_base(self, indices): @pytest.mark.parametrize("sort", [None, False]) def test_difference_base(self, sort, indices): - if isinstance(indices, CategoricalIndex): - return - first = indices[2:] second = indices[:4] - answer = indices[4:] + if isinstance(indices, CategoricalIndex) or indices.is_boolean(): + answer = [] + else: + answer = indices[4:] result = first.difference(second, sort) assert tm.equalContents(result, answer) From 1deb30567afb6567e79159db9c24d7de7b4b1697 Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Mon, 17 Feb 2020 21:40:12 +0100 Subject: [PATCH 2/5] updated docstring if indices fixture --- pandas/conftest.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index e065d10058faa..81bf646f2716c 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -978,7 +978,15 @@ def __len__(self): @pytest.fixture(params=indices_dict.keys()) def indices(request): - """ Fixture for all kinds of indices. """ + """ + Fixture for many "simple" kinds of indices. + + These indices are unlikely to cover corner cases, e.g. + - no names + - no NaTs/NaNs + - no values near implementation bounds + - ... + """ # copy to avoid mutation, e.g. setting .name return indices_dict[request.param].copy() From 48e2c8833ef7c2c06ea25df4c1e8456f1669d40f Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Sat, 22 Feb 2020 18:59:32 +0100 Subject: [PATCH 3/5] first batch of review comments --- pandas/conftest.py | 6 +++--- pandas/tests/base/test_ops.py | 10 +++++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 81bf646f2716c..b305e87e4ec8b 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1004,12 +1004,12 @@ def _create_series(index): } -@pytest.fixture(params=_series.keys()) -def series_with_differing_index(request): +@pytest.fixture +def f8series_any_simple_index(indices): """ Fixture for tests on series with different types of indices. """ - return _series[request.param].copy() + return _create_series(indices) _narrow_dtypes = [ diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index e1cb3995214f2..9d8d9c3d79cd6 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -137,8 +137,8 @@ def setup_method(self, method): self.is_valid_objs = self.objs self.not_valid_objs = [] - def test_none_comparison(self, series_with_differing_index): - o = series_with_differing_index + def test_none_comparison(self, f8series_any_simple_index): + o = f8series_any_simple_index if isinstance(o.index, IntervalIndex): pytest.skip("IntervalIndex is immutable") if len(o) < 1: @@ -211,7 +211,11 @@ def test_value_counts_unique_nunique(self, index_or_series_obj): values = o._values if orig.duplicated().any(): - pytest.skip("test implementation breaks with duplicated values") + pytest.xfail( + "The test implementation isn't flexible enough to deal" + " with duplicated values. This isn't a bug in the" + " application code, but in the test code." + ) # create repeated values, 'n'th element is repeated by n+1 times if isinstance(o, Index): From 46e3b76351aabdae4e2f7fd4092e049f2693effd Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Sat, 22 Feb 2020 19:07:14 +0100 Subject: [PATCH 4/5] adjusted skip message for test_none_comparison --- pandas/tests/base/test_ops.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index 9d8d9c3d79cd6..997c9a823da15 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -140,7 +140,8 @@ def setup_method(self, method): def test_none_comparison(self, f8series_any_simple_index): o = f8series_any_simple_index if isinstance(o.index, IntervalIndex): - pytest.skip("IntervalIndex is immutable") + # IntervalIndex breaks on "o[0] = np.nan" below + pytest.skip("IntervalIndex doesn't support assignment") if len(o) < 1: pytest.skip("Test doesn't make sense on empty data") From 10bab1c16c61cd6dae0cc2e95275473d24d7764d Mon Sep 17 00:00:00 2001 From: Martin Winkel Date: Sun, 23 Feb 2020 16:26:35 +0100 Subject: [PATCH 5/5] review comments --- pandas/conftest.py | 4 +- pandas/tests/base/test_ops.py | 146 +++++++++++++++++----------------- 2 files changed, 75 insertions(+), 75 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index b305e87e4ec8b..0d3f8b034beb7 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1005,9 +1005,9 @@ def _create_series(index): @pytest.fixture -def f8series_any_simple_index(indices): +def series_with_simple_index(indices): """ - Fixture for tests on series with different types of indices. + Fixture for tests on series with changing types of indices. """ return _create_series(indices) diff --git a/pandas/tests/base/test_ops.py b/pandas/tests/base/test_ops.py index 997c9a823da15..625d559001e72 100644 --- a/pandas/tests/base/test_ops.py +++ b/pandas/tests/base/test_ops.py @@ -137,79 +137,79 @@ def setup_method(self, method): self.is_valid_objs = self.objs self.not_valid_objs = [] - def test_none_comparison(self, f8series_any_simple_index): - o = f8series_any_simple_index - if isinstance(o.index, IntervalIndex): - # IntervalIndex breaks on "o[0] = np.nan" below + def test_none_comparison(self, series_with_simple_index): + series = series_with_simple_index + if isinstance(series.index, IntervalIndex): + # IntervalIndex breaks on "series[0] = np.nan" below pytest.skip("IntervalIndex doesn't support assignment") - if len(o) < 1: + if len(series) < 1: pytest.skip("Test doesn't make sense on empty data") # bug brought up by #1079 # changed from TypeError in 0.17.0 - o[0] = np.nan + series[0] = np.nan # noinspection PyComparisonWithNone - result = o == None # noqa + result = series == None # noqa assert not result.iat[0] assert not result.iat[1] # noinspection PyComparisonWithNone - result = o != None # noqa + result = series != None # noqa assert result.iat[0] assert result.iat[1] - result = None == o # noqa + result = None == series # noqa assert not result.iat[0] assert not result.iat[1] - result = None != o # noqa + result = None != series # noqa assert result.iat[0] assert result.iat[1] - if is_datetime64_dtype(o) or is_datetime64tz_dtype(o): + if is_datetime64_dtype(series) or is_datetime64tz_dtype(series): # Following DatetimeIndex (and Timestamp) convention, # inequality comparisons with Series[datetime64] raise msg = "Invalid comparison" with pytest.raises(TypeError, match=msg): - None > o + None > series with pytest.raises(TypeError, match=msg): - o > None + series > None else: - result = None > o + result = None > series assert not result.iat[0] assert not result.iat[1] - result = o < None + result = series < None assert not result.iat[0] assert not result.iat[1] def test_ndarray_compat_properties(self, index_or_series_obj): - o = index_or_series_obj + obj = index_or_series_obj # Check that we work. for p in ["shape", "dtype", "T", "nbytes"]: - assert getattr(o, p, None) is not None + assert getattr(obj, p, None) is not None # deprecated properties for p in ["flags", "strides", "itemsize", "base", "data"]: - assert not hasattr(o, p) + assert not hasattr(obj, p) msg = "can only convert an array of size 1 to a Python scalar" with pytest.raises(ValueError, match=msg): - o.item() # len > 1 + obj.item() # len > 1 - assert o.ndim == 1 - assert o.size == len(o) + assert obj.ndim == 1 + assert obj.size == len(obj) assert Index([1]).item() == 1 assert Series([1]).item() == 1 def test_value_counts_unique_nunique(self, index_or_series_obj): orig = index_or_series_obj - o = orig.copy() - klass = type(o) - values = o._values + obj = orig.copy() + klass = type(obj) + values = obj._values if orig.duplicated().any(): pytest.xfail( @@ -219,35 +219,35 @@ def test_value_counts_unique_nunique(self, index_or_series_obj): ) # create repeated values, 'n'th element is repeated by n+1 times - if isinstance(o, Index): - expected_index = Index(o[::-1]) + if isinstance(obj, Index): + expected_index = Index(obj[::-1]) expected_index.name = None - o = o.repeat(range(1, len(o) + 1)) + obj = obj.repeat(range(1, len(obj) + 1)) else: expected_index = Index(values[::-1]) - idx = o.index.repeat(range(1, len(o) + 1)) + idx = obj.index.repeat(range(1, len(obj) + 1)) # take-based repeat - indices = np.repeat(np.arange(len(o)), range(1, len(o) + 1)) + indices = np.repeat(np.arange(len(obj)), range(1, len(obj) + 1)) rep = values.take(indices) - o = klass(rep, index=idx) + obj = klass(rep, index=idx) # check values has the same dtype as the original - assert o.dtype == orig.dtype + assert obj.dtype == orig.dtype expected_s = Series( range(len(orig), 0, -1), index=expected_index, dtype="int64" ) - result = o.value_counts() + result = obj.value_counts() tm.assert_series_equal(result, expected_s) assert result.index.name is None - result = o.unique() - if isinstance(o, Index): - assert isinstance(result, type(o)) + result = obj.unique() + if isinstance(obj, Index): + assert isinstance(result, type(obj)) tm.assert_index_equal(result, orig) assert result.dtype == orig.dtype - elif is_datetime64tz_dtype(o): + elif is_datetime64tz_dtype(obj): # datetimetz Series returns array of Timestamp assert result[0] == orig[0] for r in result: @@ -261,17 +261,17 @@ def test_value_counts_unique_nunique(self, index_or_series_obj): assert result.dtype == orig.dtype # dropna=True would break for MultiIndex - assert o.nunique(dropna=False) == len(np.unique(o.values)) + assert obj.nunique(dropna=False) == len(np.unique(obj.values)) @pytest.mark.parametrize("null_obj", [np.nan, None]) def test_value_counts_unique_nunique_null(self, null_obj, index_or_series_obj): orig = index_or_series_obj - o = orig.copy() - klass = type(o) - values = o._ndarray_values + obj = orig.copy() + klass = type(obj) + values = obj._ndarray_values num_values = len(orig) - if not allow_na_ops(o): + if not allow_na_ops(obj): pytest.skip("type doesn't allow for NA operations") elif isinstance(orig, (pd.CategoricalIndex, pd.IntervalIndex)): pytest.skip(f"values of {klass} cannot be changed") @@ -279,54 +279,54 @@ def test_value_counts_unique_nunique_null(self, null_obj, index_or_series_obj): pytest.skip("MultiIndex doesn't support isna") # special assign to the numpy array - if is_datetime64tz_dtype(o): - if isinstance(o, DatetimeIndex): - v = o.asi8 + if is_datetime64tz_dtype(obj): + if isinstance(obj, DatetimeIndex): + v = obj.asi8 v[0:2] = iNaT - values = o._shallow_copy(v) + values = obj._shallow_copy(v) else: - o = o.copy() - o[0:2] = pd.NaT - values = o._values + obj = obj.copy() + obj[0:2] = pd.NaT + values = obj._values - elif needs_i8_conversion(o): + elif needs_i8_conversion(obj): values[0:2] = iNaT - values = o._shallow_copy(values) + values = obj._shallow_copy(values) else: values[0:2] = null_obj # check values has the same dtype as the original - assert values.dtype == o.dtype + assert values.dtype == obj.dtype # create repeated values, 'n'th element is repeated by n+1 # times - if isinstance(o, (DatetimeIndex, PeriodIndex)): - expected_index = o.copy() + if isinstance(obj, (DatetimeIndex, PeriodIndex)): + expected_index = obj.copy() expected_index.name = None # attach name to klass - o = klass(values.repeat(range(1, len(o) + 1))) - o.name = "a" + obj = klass(values.repeat(range(1, len(obj) + 1))) + obj.name = "a" else: - if isinstance(o, DatetimeIndex): + if isinstance(obj, DatetimeIndex): expected_index = orig._values._shallow_copy(values) else: expected_index = Index(values) expected_index.name = None - o = o.repeat(range(1, len(o) + 1)) - o.name = "a" + obj = obj.repeat(range(1, len(obj) + 1)) + obj.name = "a" # check values has the same dtype as the original - assert o.dtype == orig.dtype + assert obj.dtype == orig.dtype # check values correctly have NaN - nanloc = np.zeros(len(o), dtype=np.bool) + nanloc = np.zeros(len(obj), dtype=np.bool) nanloc[:3] = True - if isinstance(o, Index): - tm.assert_numpy_array_equal(pd.isna(o), nanloc) + if isinstance(obj, Index): + tm.assert_numpy_array_equal(pd.isna(obj), nanloc) else: - exp = Series(nanloc, o.index, name="a") - tm.assert_series_equal(pd.isna(o), exp) + exp = Series(nanloc, obj.index, name="a") + tm.assert_series_equal(pd.isna(obj), exp) expected_data = list(range(num_values, 2, -1)) expected_data_na = expected_data.copy() @@ -345,30 +345,30 @@ def test_value_counts_unique_nunique_null(self, null_obj, index_or_series_obj): name="a", ) - result_s_na = o.value_counts(dropna=False) + result_s_na = obj.value_counts(dropna=False) tm.assert_series_equal(result_s_na, expected_s_na) assert result_s_na.index.name is None assert result_s_na.name == "a" - result_s = o.value_counts() - tm.assert_series_equal(o.value_counts(), expected_s) + result_s = obj.value_counts() + tm.assert_series_equal(obj.value_counts(), expected_s) assert result_s.index.name is None assert result_s.name == "a" - result = o.unique() - if isinstance(o, Index): + result = obj.unique() + if isinstance(obj, Index): tm.assert_index_equal(result, Index(values[1:], name="a")) - elif is_datetime64tz_dtype(o): + elif is_datetime64tz_dtype(obj): # unable to compare NaT / nan tm.assert_extension_array_equal(result[1:], values[2:]) assert result[0] is pd.NaT - elif len(o) > 0: + elif len(obj) > 0: tm.assert_numpy_array_equal(result[1:], values[2:]) assert pd.isna(result[0]) assert result.dtype == orig.dtype - assert o.nunique() == max(0, num_values - 2) - assert o.nunique(dropna=False) == max(0, num_values - 1) + assert obj.nunique() == max(0, num_values - 2) + assert obj.nunique(dropna=False) == max(0, num_values - 1) def test_value_counts_inferred(self, index_or_series): klass = index_or_series