Skip to content

Adjust tests in extension folder for new string option #56191

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Dec 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion pandas/tests/extension/base/dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,12 @@ def test_check_dtype(self, data):

# check equivalency for using .dtypes
df = pd.DataFrame(
{"A": pd.Series(data, dtype=dtype), "B": data, "C": "foo", "D": 1}
{
"A": pd.Series(data, dtype=dtype),
"B": data,
"C": pd.Series(["foo"] * len(data), dtype=object),
"D": 1,
}
)
result = df.dtypes == str(dtype)
assert np.dtype("int64") != "Int64"
Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/extension/base/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@ class BaseGroupbyTests:

def test_grouping_grouper(self, data_for_grouping):
df = pd.DataFrame(
{"A": ["B", "B", None, None, "A", "A", "B", "C"], "B": data_for_grouping}
{
"A": pd.Series(
["B", "B", None, None, "A", "A", "B", "C"], dtype=object
),
"B": data_for_grouping,
}
)
gr1 = df.groupby("A").grouper.groupings[0]
gr2 = df.groupby("B").grouper.groupings[0]
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/base/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_dropna_series(self, data_missing):
tm.assert_series_equal(result, expected)

def test_dropna_frame(self, data_missing):
df = pd.DataFrame({"A": data_missing})
df = pd.DataFrame({"A": data_missing}, columns=pd.Index(["A"], dtype=object))

# defaults
result = df.dropna()
Expand Down
22 changes: 17 additions & 5 deletions pandas/tests/extension/base/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype

from pandas.core.dtypes.common import is_string_dtype

import pandas as pd
Expand All @@ -27,13 +29,23 @@ def _get_expected_exception(
# The self.obj_bar_exc pattern isn't great in part because it can depend
# on op_name or dtypes, but we use it here for backward-compatibility.
if op_name in ["__divmod__", "__rdivmod__"]:
return self.divmod_exc
if isinstance(obj, pd.Series) and isinstance(other, pd.Series):
return self.series_array_exc
result = self.divmod_exc
elif isinstance(obj, pd.Series) and isinstance(other, pd.Series):
result = self.series_array_exc
elif isinstance(obj, pd.Series):
return self.series_scalar_exc
result = self.series_scalar_exc
else:
return self.frame_scalar_exc
result = self.frame_scalar_exc

if using_pyarrow_string_dtype() and result is not None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @jbrockmendel if this follows how you imagined _get_expected_exception to function

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am open to other suggestions if we keep in mind that this is a temporary solution until we enable the option by default which will make testing this a lot easier

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is pretty ugly but since its temporary (and im way late in reviewing) im not inclined to worry about it. There are a bunch of similar things that i think could be improved by catching pa.lib.ArrowNotImplementedError and re-raising as TypeError for cases that are not supposed to work

import pyarrow as pa

result = ( # type: ignore[assignment]
result,
pa.lib.ArrowNotImplementedError,
NotImplementedError,
)
return result

def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result):
# In _check_op we check that the result of a pointwise operation
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/extension/base/setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,11 +351,11 @@ def test_setitem_preserves_views(self, data):

def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):
# https://github.com/pandas-dev/pandas/issues/32395
df = expected = pd.DataFrame({"data": pd.Series(data)})
df = expected = pd.DataFrame({0: pd.Series(data)})
result = pd.DataFrame(index=df.index)

key = full_indexer(df)
result.loc[key, "data"] = df["data"]
result.loc[key, 0] = df[0]

tm.assert_frame_equal(result, expected)

Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/extension/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype

import pandas as pd
from pandas import Categorical
import pandas._testing as tm
Expand Down Expand Up @@ -100,7 +102,9 @@ def test_contains(self, data, data_missing):
if na_value_obj is na_value:
continue
assert na_value_obj not in data
assert na_value_obj in data_missing # this line differs from super method
# this section suffers from super method
if not using_pyarrow_string_dtype():
assert na_value_obj in data_missing

def test_empty(self, dtype):
cls = dtype.construct_array_type()
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def test_series_constructor_scalar_with_index(self, data, dtype):


class TestDtype(BaseNumPyTests, base.BaseDtypeTests):
def test_check_dtype(self, data, request):
def test_check_dtype(self, data, request, using_infer_string):
if data.dtype.numpy_dtype == "object":
request.applymarker(
pytest.mark.xfail(
Expand Down Expand Up @@ -429,7 +429,7 @@ def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):
if data.dtype.numpy_dtype != object:
if not isinstance(key, slice) or key != slice(None):
expected = pd.DataFrame({"data": data.to_numpy()})
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected, check_column_type=False)


@skip_nested
Expand Down