Skip to content

Adjust tests in Indexing folder for string option #56107

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions pandas/tests/indexing/multiindex/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,7 @@ def test_loc_setitem_single_column_slice():
tm.assert_frame_equal(df, expected)


def test_loc_nan_multiindex():
def test_loc_nan_multiindex(using_infer_string):
# GH 5286
tups = [
("Good Things", "C", np.nan),
Expand All @@ -586,8 +586,12 @@ def test_loc_nan_multiindex():
result = df.loc["Good Things"].loc["C"]
expected = DataFrame(
np.ones((1, 4)),
index=Index([np.nan], dtype="object", name="u3"),
columns=Index(["d1", "d2", "d3", "d4"], dtype="object"),
index=Index(
[np.nan],
dtype="object" if not using_infer_string else "string[pyarrow_numpy]",
name="u3",
),
columns=Index(["d1", "d2", "d3", "d4"]),
)
tm.assert_frame_equal(result, expected)

Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/indexing/test_at.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
CategoricalIndex,
DataFrame,
DatetimeIndex,
Index,
MultiIndex,
Series,
Timestamp,
Expand Down Expand Up @@ -70,7 +71,11 @@ def test_at_setitem_item_cache_cleared(self):
df.at[0, "x"] = 4
df.at[0, "cost"] = 789

expected = DataFrame({"x": [4], "cost": 789}, index=[0])
expected = DataFrame(
{"x": [4], "cost": 789},
index=[0],
columns=Index(["x", "cost"], dtype=object),
)
tm.assert_frame_equal(df, expected)

# And in particular, check that the _item_cache has updated correctly.
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexing/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def test_slicing_doc_examples(self):
tm.assert_frame_equal(result, expected)

result = df.iloc[2:4, :].dtypes
expected = Series(["category", "int64"], ["cats", "values"])
expected = Series(["category", "int64"], ["cats", "values"], dtype=object)
tm.assert_series_equal(result, expected)

result = df.loc["h":"j", "cats"]
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/indexing/test_chaining_and_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,9 @@ def test_detect_chained_assignment_object_dtype(
self, using_array_manager, using_copy_on_write, warn_copy_on_write
):
expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]})
df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})
df = DataFrame(
{"A": Series(["aaa", "bbb", "ccc"], dtype=object), "B": [1, 2, 3]}
)
df_original = df.copy()

if not using_copy_on_write and not warn_copy_on_write:
Expand Down
27 changes: 18 additions & 9 deletions pandas/tests/indexing/test_coercion.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype

from pandas.compat import (
IS64,
is_platform_windows,
Expand Down Expand Up @@ -111,7 +113,7 @@ def _assert_setitem_index_conversion(
"val,exp_dtype", [("x", object), (5, IndexError), (1.1, object)]
)
def test_setitem_index_object(self, val, exp_dtype):
obj = pd.Series([1, 2, 3, 4], index=list("abcd"))
obj = pd.Series([1, 2, 3, 4], index=pd.Index(list("abcd"), dtype=object))
assert obj.index.dtype == object

if exp_dtype is IndexError:
Expand All @@ -122,7 +124,7 @@ def test_setitem_index_object(self, val, exp_dtype):
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
temp[5] = 5
else:
exp_index = pd.Index(list("abcd") + [val])
exp_index = pd.Index(list("abcd") + [val], dtype=object)
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)

@pytest.mark.parametrize(
Expand Down Expand Up @@ -195,10 +197,10 @@ def _assert_insert_conversion(self, original, value, expected, expected_dtype):
],
)
def test_insert_index_object(self, insert, coerced_val, coerced_dtype):
obj = pd.Index(list("abcd"))
obj = pd.Index(list("abcd"), dtype=object)
assert obj.dtype == object

exp = pd.Index(["a", coerced_val, "b", "c", "d"])
exp = pd.Index(["a", coerced_val, "b", "c", "d"], dtype=object)
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)

@pytest.mark.parametrize(
Expand Down Expand Up @@ -397,7 +399,7 @@ def _run_test(self, obj, fill_val, klass, exp_dtype):
)
def test_where_object(self, index_or_series, fill_val, exp_dtype):
klass = index_or_series
obj = klass(list("abcd"))
obj = klass(list("abcd"), dtype=object)
assert obj.dtype == object
self._run_test(obj, fill_val, klass, exp_dtype)

Expand Down Expand Up @@ -559,10 +561,10 @@ def _assert_fillna_conversion(self, original, value, expected, expected_dtype):
)
def test_fillna_object(self, index_or_series, fill_val, fill_dtype):
klass = index_or_series
obj = klass(["a", np.nan, "c", "d"])
obj = klass(["a", np.nan, "c", "d"], dtype=object)
assert obj.dtype == object

exp = klass(["a", fill_val, "c", "d"])
exp = klass(["a", fill_val, "c", "d"], dtype=object)
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)

@pytest.mark.parametrize(
Expand Down Expand Up @@ -824,6 +826,8 @@ def replacer(self, how, from_key, to_key):
raise ValueError
return replacer

# Expected needs adjustment for the infer string option, seems to work as expecetd
@pytest.mark.skipif(using_pyarrow_string_dtype(), reason="TODO: test is to complex")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was the failure here do due a bug or something expected to work? Would be good to mention that here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think everything works as expected, but the parametrisation is so complex that I couldn't figure this out to adjust efficiently, that's why I am skipping for now, added a comment

def test_replace_series(self, how, to_key, from_key, replacer):
index = pd.Index([3, 4], name="xxx")
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
Expand Down Expand Up @@ -870,13 +874,18 @@ def test_replace_series(self, how, to_key, from_key, replacer):
@pytest.mark.parametrize(
"from_key", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], indirect=True
)
def test_replace_series_datetime_tz(self, how, to_key, from_key, replacer):
def test_replace_series_datetime_tz(
self, how, to_key, from_key, replacer, using_infer_string
):
index = pd.Index([3, 4], name="xyz")
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
assert obj.dtype == from_key

exp = pd.Series(self.rep[to_key], index=index, name="yyy")
assert exp.dtype == to_key
if using_infer_string and to_key == "object":
assert exp.dtype == "string"
else:
assert exp.dtype == to_key

msg = "Downcasting behavior in `replace`"
warn = FutureWarning if exp.dtype != object else None
Expand Down
23 changes: 17 additions & 6 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,8 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage
# we retain the object dtype.
frame = DataFrame({0: np.array([0, 1, 2], dtype=object), 1: range(3)})
df = frame.copy()
orig_vals = df.values
indexer(df)[key, 0] = cat
expected = DataFrame({0: cat.astype(object), 1: range(3)})
expected = DataFrame({0: Series(cat.astype(object), dtype=object), 1: range(3)})
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize("box", [array, Series])
Expand Down Expand Up @@ -232,7 +231,10 @@ def test_iloc_exceeds_bounds(self):
dfl = DataFrame(
np.random.default_rng(2).standard_normal((5, 2)), columns=list("AB")
)
tm.assert_frame_equal(dfl.iloc[:, 2:3], DataFrame(index=dfl.index, columns=[]))
tm.assert_frame_equal(
dfl.iloc[:, 2:3],
DataFrame(index=dfl.index, columns=Index([], dtype=dfl.columns.dtype)),
)
tm.assert_frame_equal(dfl.iloc[:, 1:3], dfl.iloc[:, [1]])
tm.assert_frame_equal(dfl.iloc[4:6], dfl.iloc[[4]])

Expand Down Expand Up @@ -451,12 +453,16 @@ def test_iloc_setitem(self):
def test_iloc_setitem_axis_argument(self):
# GH45032
df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]])
df[1] = df[1].astype(object)
expected = DataFrame([[6, "c", 10], [7, "d", 11], [5, 5, 5]])
expected[1] = expected[1].astype(object)
df.iloc(axis=0)[2] = 5
tm.assert_frame_equal(df, expected)

df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]])
df[1] = df[1].astype(object)
expected = DataFrame([[6, "c", 5], [7, "d", 5], [8, "e", 5]])
expected[1] = expected[1].astype(object)
df.iloc(axis=1)[2] = 5
tm.assert_frame_equal(df, expected)

Expand Down Expand Up @@ -615,7 +621,7 @@ def test_iloc_getitem_labelled_frame(self):
assert result == exp

# out-of-bounds exception
msg = "index 5 is out of bounds for axis 0 with size 4"
msg = "index 5 is out of bounds for axis 0 with size 4|index out of bounds"
with pytest.raises(IndexError, match=msg):
df.iloc[10, 5]

Expand Down Expand Up @@ -1313,7 +1319,9 @@ def test_iloc_setitem_dtypes_duplicate_columns(
self, dtypes, init_value, expected_value
):
# GH#22035
df = DataFrame([[init_value, "str", "str2"]], columns=["a", "b", "b"])
df = DataFrame(
[[init_value, "str", "str2"]], columns=["a", "b", "b"], dtype=object
)

# with the enforcement of GH#45333 in 2.0, this sets values inplace,
# so we retain object dtype
Expand Down Expand Up @@ -1360,7 +1368,10 @@ def test_frame_iloc_getitem_callable(self):

def test_frame_iloc_setitem_callable(self):
# GH#11485
df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD"))
df = DataFrame(
{"X": [1, 2, 3, 4], "Y": Series(list("aabb"), dtype=object)},
index=list("ABCD"),
)

# return location
res = df.copy()
Expand Down
48 changes: 33 additions & 15 deletions pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype

from pandas.errors import IndexingError

from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -189,7 +191,7 @@ def test_setitem_dtype_upcast(self):
):
df.loc[0, "c"] = "foo"
expected = DataFrame(
[{"a": 1, "b": np.nan, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}]
{"a": [1, 3], "b": [np.nan, 2], "c": Series(["foo", np.nan], dtype=object)}
)
tm.assert_frame_equal(df, expected)

Expand Down Expand Up @@ -284,18 +286,27 @@ def test_dups_fancy_indexing_not_in_order(self):
with pytest.raises(KeyError, match="not in index"):
df.loc[rows]

def test_dups_fancy_indexing_only_missing_label(self):
def test_dups_fancy_indexing_only_missing_label(self, using_infer_string):
# List containing only missing label
dfnu = DataFrame(
np.random.default_rng(2).standard_normal((5, 3)), index=list("AABCD")
)
with pytest.raises(
KeyError,
match=re.escape(
"\"None of [Index(['E'], dtype='object')] are in the [index]\""
),
):
dfnu.loc[["E"]]
if using_infer_string:
with pytest.raises(
KeyError,
match=re.escape(
"\"None of [Index(['E'], dtype='string')] are in the [index]\""
),
):
dfnu.loc[["E"]]
else:
with pytest.raises(
KeyError,
match=re.escape(
"\"None of [Index(['E'], dtype='object')] are in the [index]\""
),
):
dfnu.loc[["E"]]

@pytest.mark.parametrize("vals", [[0, 1, 2], list("abc")])
def test_dups_fancy_indexing_missing_label(self, vals):
Expand Down Expand Up @@ -451,6 +462,9 @@ def test_set_index_nan(self):
)
tm.assert_frame_equal(result, df)

@pytest.mark.xfail(
using_pyarrow_string_dtype(), reason="can't multiply arrow strings"
)
def test_multi_assign(self):
# GH 3626, an assignment of a sub-df to a df
# set float64 to avoid upcast when setting nan
Expand Down Expand Up @@ -553,7 +567,7 @@ def test_string_slice_empty(self):
with pytest.raises(KeyError, match="^0$"):
df.loc["2011", 0]

def test_astype_assignment(self):
def test_astype_assignment(self, using_infer_string):
# GH4312 (iloc)
df_orig = DataFrame(
[["1", "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
Expand All @@ -567,8 +581,9 @@ def test_astype_assignment(self):
expected = DataFrame(
[[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
)
expected["A"] = expected["A"].astype(object)
expected["B"] = expected["B"].astype(object)
if not using_infer_string:
expected["A"] = expected["A"].astype(object)
expected["B"] = expected["B"].astype(object)
tm.assert_frame_equal(df, expected)

# GH5702 (loc)
Expand All @@ -577,16 +592,18 @@ def test_astype_assignment(self):
expected = DataFrame(
[[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
)
expected["A"] = expected["A"].astype(object)
if not using_infer_string:
expected["A"] = expected["A"].astype(object)
tm.assert_frame_equal(df, expected)

df = df_orig.copy()
df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64)
expected = DataFrame(
[["1", 2, 3, ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
)
expected["B"] = expected["B"].astype(object)
expected["C"] = expected["C"].astype(object)
if not using_infer_string:
expected["B"] = expected["B"].astype(object)
expected["C"] = expected["C"].astype(object)
tm.assert_frame_equal(df, expected)

def test_astype_assignment_full_replacements(self):
Expand Down Expand Up @@ -673,6 +690,7 @@ def test_loc_setitem_fullindex_views(self):
df.loc[df.index] = df.loc[df.index]
tm.assert_frame_equal(df, df2)

@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't set int into string")
def test_rhs_alignment(self):
# GH8258, tests that both rows & columns are aligned to what is
# assigned to. covers both uniform data-type & multi-type cases
Expand Down
Loading