From 575c21dacb629dba2aa96a957f5e55d1f32f540e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 1 Sep 2023 14:50:23 +0200 Subject: [PATCH 1/2] Revert "REF: remove unnecessary na_value fixture" --- pandas/tests/extension/base/constructors.py | 6 ++-- pandas/tests/extension/base/getitem.py | 15 +++----- pandas/tests/extension/base/methods.py | 11 +++--- pandas/tests/extension/base/reshaping.py | 20 ++++------- pandas/tests/extension/base/setitem.py | 3 +- pandas/tests/extension/conftest.py | 6 ++++ pandas/tests/extension/json/test_json.py | 16 ++++----- pandas/tests/extension/test_arrow.py | 10 ++++-- pandas/tests/extension/test_sparse.py | 39 ++++++++++----------- 9 files changed, 59 insertions(+), 67 deletions(-) diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index 7215e910365cf..8828f33b7c62c 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -35,8 +35,7 @@ def test_series_constructor(self, data): if hasattr(result._mgr, "blocks"): assert isinstance(result2._mgr.blocks[0], EABackedBlock) - def test_series_constructor_no_data_with_index(self, dtype): - na_value = dtype.na_value + def test_series_constructor_no_data_with_index(self, dtype, na_value): result = pd.Series(index=[1, 2, 3], dtype=dtype) expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype) tm.assert_series_equal(result, expected) @@ -46,8 +45,7 @@ def test_series_constructor_no_data_with_index(self, dtype): expected = pd.Series([], index=pd.Index([], dtype="object"), dtype=dtype) tm.assert_series_equal(result, expected) - def test_series_constructor_scalar_na_with_index(self, dtype): - na_value = dtype.na_value + def test_series_constructor_scalar_na_with_index(self, dtype, na_value): result = pd.Series(na_value, index=[1, 2, 3], dtype=dtype) expected = pd.Series([na_value] * 3, index=[1, 2, 3], dtype=dtype) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index be16a105384c6..5f0c1b960a475 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -148,8 +148,7 @@ def test_getitem_invalid(self, data): with pytest.raises(IndexError, match=msg): data[-ub - 1] - def test_getitem_scalar_na(self, data_missing, na_cmp): - na_value = data_missing.dtype.na_value + def test_getitem_scalar_na(self, data_missing, na_cmp, na_value): result = data_missing[0] assert na_cmp(result, na_value) @@ -349,8 +348,7 @@ def test_take_sequence(self, data): assert result.iloc[1] == data[1] assert result.iloc[2] == data[3] - def test_take(self, data, na_cmp): - na_value = data.dtype.na_value + def test_take(self, data, na_value, na_cmp): result = data.take([0, -1]) assert result.dtype == data.dtype assert result[0] == data[0] @@ -363,8 +361,7 @@ def test_take(self, data, na_cmp): with pytest.raises(IndexError, match="out of bounds"): data.take([len(data) + 1]) - def test_take_empty(self, data, na_cmp): - na_value = data.dtype.na_value + def test_take_empty(self, data, na_value, na_cmp): empty = data[:0] result = empty.take([-1], allow_fill=True) @@ -396,8 +393,7 @@ def test_take_non_na_fill_value(self, data_missing): expected = arr.take([1, 1]) tm.assert_extension_array_equal(result, expected) - def test_take_pandas_style_negative_raises(self, data): - na_value = data.dtype.na_value + def test_take_pandas_style_negative_raises(self, data, na_value): with pytest.raises(ValueError, match=""): data.take([0, -2], fill_value=na_value, allow_fill=True) @@ -417,8 +413,7 @@ def test_take_series(self, data): ) tm.assert_series_equal(result, expected) - def test_reindex(self, data): - na_value = data.dtype.na_value + def test_reindex(self, data, na_value): s = pd.Series(data) result = s.reindex([0, 1, 3]) expected = pd.Series(data.take([0, 1, 3]), index=[0, 1, 3]) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 4e0bc8d804bab..16059155a7a8f 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -121,7 +121,7 @@ def test_argsort_missing(self, data_missing_for_sorting): expected = pd.Series(np.array([1, -1, 0], dtype=np.intp)) tm.assert_series_equal(result, expected) - def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting): + def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting, na_value): # GH 24382 is_bool = data_for_sorting.dtype._is_boolean @@ -154,10 +154,9 @@ def test_argmin_argmax_empty_array(self, method, data): getattr(data[:0], method)() @pytest.mark.parametrize("method", ["argmax", "argmin"]) - def test_argmin_argmax_all_na(self, method, data): + def test_argmin_argmax_all_na(self, method, data, na_value): # all missing with skipna=True is the same as empty err_msg = "attempt to get" - na_value = data.dtype.na_value data_na = type(data)._from_sequence([na_value, na_value], dtype=data.dtype) with pytest.raises(ValueError, match=err_msg): getattr(data_na, method)() @@ -544,8 +543,7 @@ def _test_searchsorted_bool_dtypes(self, data_for_sorting, as_series): sorter = np.array([1, 0]) assert data_for_sorting.searchsorted(a, sorter=sorter) == 0 - def test_where_series(self, data, as_frame): - na_value = data.dtype.na_value + def test_where_series(self, data, na_value, as_frame): assert data[0] != data[1] cls = type(data) a, b = data[:2] @@ -672,8 +670,7 @@ def test_insert_invalid_loc(self, data): data.insert(1.5, data[0]) @pytest.mark.parametrize("box", [pd.array, pd.Series, pd.DataFrame]) - def test_equals(self, data, as_series, box): - na_value = data.dtype.na_value + def test_equals(self, data, na_value, as_series, box): data2 = type(data)._from_sequence([data[0]] * len(data), dtype=data.dtype) data_na = type(data)._from_sequence([na_value] * len(data), dtype=data.dtype) diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index 13ab56adfe914..a9bd12917e73e 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -72,8 +72,7 @@ def test_concat_mixed_dtypes(self, data): expected = pd.concat([df1["A"].astype("object"), df2["A"].astype("object")]) tm.assert_series_equal(result, expected) - def test_concat_columns(self, data): - na_value = data.dtype.na_value + def test_concat_columns(self, data, na_value): df1 = pd.DataFrame({"A": data[:3]}) df2 = pd.DataFrame({"B": [1, 2, 3]}) @@ -97,9 +96,8 @@ def test_concat_columns(self, data): result = pd.concat([df1["A"], df2["B"]], axis=1) tm.assert_frame_equal(result, expected) - def test_concat_extension_arrays_copy_false(self, data): + def test_concat_extension_arrays_copy_false(self, data, na_value): # GH 20756 - na_value = data.dtype.na_value df1 = pd.DataFrame({"A": data[:3]}) df2 = pd.DataFrame({"B": data[3:7]}) expected = pd.DataFrame( @@ -124,8 +122,7 @@ def test_concat_with_reindex(self, data): ) tm.assert_frame_equal(result, expected) - def test_align(self, data): - na_value = data.dtype.na_value + def test_align(self, data, na_value): a = data[:3] b = data[2:5] r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3])) @@ -136,8 +133,7 @@ def test_align(self, data): tm.assert_series_equal(r1, e1) tm.assert_series_equal(r2, e2) - def test_align_frame(self, data): - na_value = data.dtype.na_value + def test_align_frame(self, data, na_value): a = data[:3] b = data[2:5] r1, r2 = pd.DataFrame({"A": a}).align(pd.DataFrame({"A": b}, index=[1, 2, 3])) @@ -152,9 +148,8 @@ def test_align_frame(self, data): tm.assert_frame_equal(r1, e1) tm.assert_frame_equal(r2, e2) - def test_align_series_frame(self, data): + def test_align_series_frame(self, data, na_value): # https://github.com/pandas-dev/pandas/issues/20576 - na_value = data.dtype.na_value ser = pd.Series(data, name="a") df = pd.DataFrame({"col": np.arange(len(ser) + 1)}) r1, r2 = ser.align(df) @@ -185,7 +180,7 @@ def test_set_frame_overwrite_object(self, data): df["A"] = data assert df.dtypes["A"] == data.dtype - def test_merge(self, data): + def test_merge(self, data, na_value): # GH-20743 df1 = pd.DataFrame({"ext": data[:3], "int1": [1, 2, 3], "key": [0, 1, 2]}) df2 = pd.DataFrame({"int2": [1, 2, 3, 4], "key": [0, 0, 1, 3]}) @@ -210,8 +205,7 @@ def test_merge(self, data): "int2": [1, 2, 3, np.nan, 4], "key": [0, 0, 1, 2, 3], "ext": data._from_sequence( - [data[0], data[0], data[1], data[2], data.dtype.na_value], - dtype=data.dtype, + [data[0], data[0], data[1], data[2], na_value], dtype=data.dtype ), } ) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index c975138837e6b..fd90635deffac 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -359,8 +359,7 @@ def test_setitem_with_expansion_dataframe_column(self, data, full_indexer): tm.assert_frame_equal(result, expected) - def test_setitem_with_expansion_row(self, data): - na_value = data.dtype.na_value + def test_setitem_with_expansion_row(self, data, na_value): df = pd.DataFrame({"data": data[:1]}) df.loc[1, "data"] = data[1] diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py index eb60aea7cc8c2..7b7945b15ed83 100644 --- a/pandas/tests/extension/conftest.py +++ b/pandas/tests/extension/conftest.py @@ -116,6 +116,12 @@ def na_cmp(): return operator.is_ +@pytest.fixture +def na_value(dtype): + """The scalar missing value for this type. Default dtype.na_value""" + return dtype.na_value + + @pytest.fixture def data_for_grouping(): """ diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 68a27a28b160f..9e1a4fb5da251 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -97,24 +97,24 @@ def test_from_dtype(self, data): super().test_from_dtype(data) @pytest.mark.xfail(reason="RecursionError, GH-33900") - def test_series_constructor_no_data_with_index(self, dtype): + def test_series_constructor_no_data_with_index(self, dtype, na_value): # RecursionError: maximum recursion depth exceeded in comparison rec_limit = sys.getrecursionlimit() try: # Limit to avoid stack overflow on Windows CI sys.setrecursionlimit(100) - super().test_series_constructor_no_data_with_index(dtype) + super().test_series_constructor_no_data_with_index(dtype, na_value) finally: sys.setrecursionlimit(rec_limit) @pytest.mark.xfail(reason="RecursionError, GH-33900") - def test_series_constructor_scalar_na_with_index(self, dtype): + def test_series_constructor_scalar_na_with_index(self, dtype, na_value): # RecursionError: maximum recursion depth exceeded in comparison rec_limit = sys.getrecursionlimit() try: # Limit to avoid stack overflow on Windows CI sys.setrecursionlimit(100) - super().test_series_constructor_scalar_na_with_index(dtype) + super().test_series_constructor_scalar_na_with_index(dtype, na_value) finally: sys.setrecursionlimit(rec_limit) @@ -214,19 +214,19 @@ def test_combine_first(self, data): super().test_combine_first(data) @pytest.mark.xfail(reason="broadcasting error") - def test_where_series(self, data): + def test_where_series(self, data, na_value): # Fails with # *** ValueError: operands could not be broadcast together # with shapes (4,) (4,) (0,) - super().test_where_series(data) + super().test_where_series(data, na_value) @pytest.mark.xfail(reason="Can't compare dicts.") def test_searchsorted(self, data_for_sorting): super().test_searchsorted(data_for_sorting) @pytest.mark.xfail(reason="Can't compare dicts.") - def test_equals(self, data, as_series): - super().test_equals(data, as_series) + def test_equals(self, data, na_value, as_series): + super().test_equals(data, na_value, as_series) @pytest.mark.skip("fill-value is interpreted as a dict of values") def test_fillna_copy_frame(self, data_missing): diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 13d80329f4d51..2f434ed2f1a93 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -518,7 +518,9 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, reque super().test_reduce_series_numeric(data, all_numeric_reductions, skipna) @pytest.mark.parametrize("skipna", [True, False]) - def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna, request): + def test_reduce_series_boolean( + self, data, all_boolean_reductions, skipna, na_value, request + ): pa_dtype = data.dtype.pyarrow_dtype xfail_mark = pytest.mark.xfail( raises=TypeError, @@ -751,7 +753,9 @@ def test_value_counts_returns_pyarrow_int64(self, data): result = data.value_counts() assert result.dtype == ArrowDtype(pa.int64()) - def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting, request): + def test_argmin_argmax( + self, data_for_sorting, data_missing_for_sorting, na_value, request + ): pa_dtype = data_for_sorting.dtype.pyarrow_dtype if pa.types.is_decimal(pa_dtype) and pa_version_under7p0: request.node.add_marker( @@ -760,7 +764,7 @@ def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting, request raises=pa.ArrowNotImplementedError, ) ) - super().test_argmin_argmax(data_for_sorting, data_missing_for_sorting) + super().test_argmin_argmax(data_for_sorting, data_missing_for_sorting, na_value) @pytest.mark.parametrize( "op_name, skipna, expected", diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index f56dea3f43de7..7330e03a57daf 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -146,29 +146,29 @@ def test_concat_mixed_dtypes(self, data): def test_stack(self, data, columns, future_stack): super().test_stack(data, columns, future_stack) - def test_concat_columns(self, data): + def test_concat_columns(self, data, na_value): self._check_unsupported(data) - super().test_concat_columns(data) + super().test_concat_columns(data, na_value) - def test_concat_extension_arrays_copy_false(self, data): + def test_concat_extension_arrays_copy_false(self, data, na_value): self._check_unsupported(data) - super().test_concat_extension_arrays_copy_false(data) + super().test_concat_extension_arrays_copy_false(data, na_value) - def test_align(self, data): + def test_align(self, data, na_value): self._check_unsupported(data) - super().test_align(data) + super().test_align(data, na_value) - def test_align_frame(self, data): + def test_align_frame(self, data, na_value): self._check_unsupported(data) - super().test_align_frame(data) + super().test_align_frame(data, na_value) - def test_align_series_frame(self, data): + def test_align_series_frame(self, data, na_value): self._check_unsupported(data) - super().test_align_series_frame(data) + super().test_align_series_frame(data, na_value) - def test_merge(self, data): + def test_merge(self, data, na_value): self._check_unsupported(data) - super().test_merge(data) + super().test_merge(data, na_value) class TestGetitem(BaseSparseTests, base.BaseGetitemTests): @@ -180,9 +180,9 @@ def test_get(self, data): assert ser.get(4) == ser.iloc[2] assert ser.get(2) == ser.iloc[1] - def test_reindex(self, data): + def test_reindex(self, data, na_value): self._check_unsupported(data) - super().test_reindex(data) + super().test_reindex(data, na_value) class TestSetitem(BaseSparseTests, base.BaseSetitemTests): @@ -282,7 +282,7 @@ def test_fillna_copy_series(self, data_missing, using_copy_on_write): def test_fillna_length_mismatch(self, data_missing): super().test_fillna_length_mismatch(data_missing) - def test_where_series(self, data): + def test_where_series(self, data, na_value): assert data[0] != data[1] cls = type(data) a, b = data[:2] @@ -293,7 +293,6 @@ def test_where_series(self, data): result = ser.where(cond) new_dtype = SparseDtype("float", 0.0) - na_value = data.dtype.na_value expected = pd.Series( cls._from_sequence([a, a, na_value, na_value], dtype=new_dtype) ) @@ -317,15 +316,15 @@ def test_shift_0_periods(self, data): assert result._sparse_values[0] != result._sparse_values[1] @pytest.mark.parametrize("method", ["argmax", "argmin"]) - def test_argmin_argmax_all_na(self, method, data): + def test_argmin_argmax_all_na(self, method, data, na_value): # overriding because Sparse[int64, 0] cannot handle na_value self._check_unsupported(data) - super().test_argmin_argmax_all_na(method, data) + super().test_argmin_argmax_all_na(method, data, na_value) @pytest.mark.parametrize("box", [pd.array, pd.Series, pd.DataFrame]) - def test_equals(self, data, as_series, box): + def test_equals(self, data, na_value, as_series, box): self._check_unsupported(data) - super().test_equals(data, as_series, box) + super().test_equals(data, na_value, as_series, box) @pytest.mark.parametrize( "func, na_action, expected", From ec1df2bb1860dda0a5c018adbf037d69bc4511e6 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 20 Nov 2023 08:36:10 +0100 Subject: [PATCH 2/2] add comment about removal --- pandas/tests/extension/conftest.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py index d42bf4f5fe2fa..c5b1295ee4a7d 100644 --- a/pandas/tests/extension/conftest.py +++ b/pandas/tests/extension/conftest.py @@ -120,7 +120,11 @@ def na_cmp(): @pytest.fixture def na_value(dtype): - """The scalar missing value for this type. Default dtype.na_value""" + """ + The scalar missing value for this type. Default dtype.na_value. + + TODO: can be removed in 3.x (see https://github.com/pandas-dev/pandas/pull/54930) + """ return dtype.na_value