From c1463b88346b4532885dbeaeeb3874bce926afd7 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 26 Nov 2023 22:11:32 +0100 Subject: [PATCH 1/3] Adjust tests in extension folder for new string option --- pandas/tests/extension/base/dtype.py | 7 ++++++- pandas/tests/extension/base/groupby.py | 7 ++++++- pandas/tests/extension/base/missing.py | 2 +- pandas/tests/extension/base/ops.py | 18 +++++++++++++----- pandas/tests/extension/base/setitem.py | 2 +- pandas/tests/extension/test_categorical.py | 6 +++++- pandas/tests/extension/test_numpy.py | 4 ++-- 7 files changed, 34 insertions(+), 12 deletions(-) diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py index 5ba65ceaeeada..c7b768f6e3c88 100644 --- a/pandas/tests/extension/base/dtype.py +++ b/pandas/tests/extension/base/dtype.py @@ -59,7 +59,12 @@ def test_check_dtype(self, data): # check equivalency for using .dtypes df = pd.DataFrame( - {"A": pd.Series(data, dtype=dtype), "B": data, "C": "foo", "D": 1} + { + "A": pd.Series(data, dtype=dtype), + "B": data, + "C": pd.Series(["foo"] * len(data), dtype=object), + "D": 1, + } ) result = df.dtypes == str(dtype) assert np.dtype("int64") != "Int64" diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py index 5c21c4f7137a5..4e8221f67a74d 100644 --- a/pandas/tests/extension/base/groupby.py +++ b/pandas/tests/extension/base/groupby.py @@ -21,7 +21,12 @@ class BaseGroupbyTests: def test_grouping_grouper(self, data_for_grouping): df = pd.DataFrame( - {"A": ["B", "B", None, None, "A", "A", "B", "C"], "B": data_for_grouping} + { + "A": pd.Series( + ["B", "B", None, None, "A", "A", "B", "C"], dtype=object + ), + "B": data_for_grouping, + } ) gr1 = df.groupby("A").grouper.groupings[0] gr2 = df.groupby("B").grouper.groupings[0] diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index 40cc952d44200..ffb7a24b4b390 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -44,7 +44,7 @@ def test_dropna_series(self, data_missing): tm.assert_series_equal(result, expected) def test_dropna_frame(self, data_missing): - df = pd.DataFrame({"A": data_missing}) + df = pd.DataFrame({"A": data_missing}, columns=pd.Index(["A"], dtype=object)) # defaults result = df.dropna() diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 40fab5ec11d7d..6932a43c8d552 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -5,6 +5,8 @@ import numpy as np import pytest +from pandas._config import using_pyarrow_string_dtype + from pandas.core.dtypes.common import is_string_dtype import pandas as pd @@ -27,13 +29,19 @@ def _get_expected_exception( # The self.obj_bar_exc pattern isn't great in part because it can depend # on op_name or dtypes, but we use it here for backward-compatibility. if op_name in ["__divmod__", "__rdivmod__"]: - return self.divmod_exc - if isinstance(obj, pd.Series) and isinstance(other, pd.Series): - return self.series_array_exc + result = self.divmod_exc + elif isinstance(obj, pd.Series) and isinstance(other, pd.Series): + result = self.series_array_exc elif isinstance(obj, pd.Series): - return self.series_scalar_exc + result = self.series_scalar_exc else: - return self.frame_scalar_exc + result = self.frame_scalar_exc + + if using_pyarrow_string_dtype() and result is not None: + import pyarrow as pa + + result = result, pa.lib.ArrowNotImplementedError, NotImplementedError + return result def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result): # In _check_op we check that the result of a pointwise operation diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 067b401ce2f23..4434152676ba3 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -357,7 +357,7 @@ def test_setitem_with_expansion_dataframe_column(self, data, full_indexer): key = full_indexer(df) result.loc[key, "data"] = df["data"] - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_column_type=False) def test_setitem_with_expansion_row(self, data, na_value): df = pd.DataFrame({"data": data[:1]}) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 5cde5df4bc007..6f33b18b19c51 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -18,6 +18,8 @@ import numpy as np import pytest +from pandas._config import using_pyarrow_string_dtype + import pandas as pd from pandas import Categorical import pandas._testing as tm @@ -100,7 +102,9 @@ def test_contains(self, data, data_missing): if na_value_obj is na_value: continue assert na_value_obj not in data - assert na_value_obj in data_missing # this line differs from super method + # this section suffers from super method + if not using_pyarrow_string_dtype(): + assert na_value_obj in data_missing def test_empty(self, dtype): cls = dtype.construct_array_type() diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index f1939ea174841..c0692064cfaec 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -196,7 +196,7 @@ def test_series_constructor_scalar_with_index(self, data, dtype): class TestDtype(BaseNumPyTests, base.BaseDtypeTests): - def test_check_dtype(self, data, request): + def test_check_dtype(self, data, request, using_infer_string): if data.dtype.numpy_dtype == "object": request.applymarker( pytest.mark.xfail( @@ -429,7 +429,7 @@ def test_setitem_with_expansion_dataframe_column(self, data, full_indexer): if data.dtype.numpy_dtype != object: if not isinstance(key, slice) or key != slice(None): expected = pd.DataFrame({"data": data.to_numpy()}) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_column_type=False) @skip_nested From 4989b4447b6e627b9cced8431caaaab7bf09771c Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 26 Nov 2023 23:14:17 +0100 Subject: [PATCH 2/3] Fix typing --- pandas/tests/extension/base/ops.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 6932a43c8d552..5cd66d8a874c7 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -40,7 +40,11 @@ def _get_expected_exception( if using_pyarrow_string_dtype() and result is not None: import pyarrow as pa - result = result, pa.lib.ArrowNotImplementedError, NotImplementedError + result = ( # type: ignore[assignment] + result, + pa.lib.ArrowNotImplementedError, + NotImplementedError, + ) return result def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result): From 230316f9e6b0114c4f998a8fddb4d01ce528fac0 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 27 Nov 2023 12:12:43 +0100 Subject: [PATCH 3/3] Update setitem.py --- pandas/tests/extension/base/setitem.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 4434152676ba3..187da89729f0e 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -351,13 +351,13 @@ def test_setitem_preserves_views(self, data): def test_setitem_with_expansion_dataframe_column(self, data, full_indexer): # https://github.com/pandas-dev/pandas/issues/32395 - df = expected = pd.DataFrame({"data": pd.Series(data)}) + df = expected = pd.DataFrame({0: pd.Series(data)}) result = pd.DataFrame(index=df.index) key = full_indexer(df) - result.loc[key, "data"] = df["data"] + result.loc[key, 0] = df[0] - tm.assert_frame_equal(result, expected, check_column_type=False) + tm.assert_frame_equal(result, expected) def test_setitem_with_expansion_row(self, data, na_value): df = pd.DataFrame({"data": data[:1]})