Skip to content

Commit b19a093

Browse files
authored
Adjust tests in Indexing folder for string option (#56107)
* BUG: Index.getitem returning wrong result with negative step for arrow * Update * Update * Fix * Update array.py * Fix * Add gh ref * Update * Fix string option tests in indexing * Fix string option tests in indexing * Fix string option tests in indexing * Update test_coercion.py * Update test_coercion.py
1 parent 50d99ca commit b19a093

9 files changed

+149
-61
lines changed

pandas/tests/indexing/multiindex/test_loc.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,7 @@ def test_loc_setitem_single_column_slice():
566566
tm.assert_frame_equal(df, expected)
567567

568568

569-
def test_loc_nan_multiindex():
569+
def test_loc_nan_multiindex(using_infer_string):
570570
# GH 5286
571571
tups = [
572572
("Good Things", "C", np.nan),
@@ -586,8 +586,12 @@ def test_loc_nan_multiindex():
586586
result = df.loc["Good Things"].loc["C"]
587587
expected = DataFrame(
588588
np.ones((1, 4)),
589-
index=Index([np.nan], dtype="object", name="u3"),
590-
columns=Index(["d1", "d2", "d3", "d4"], dtype="object"),
589+
index=Index(
590+
[np.nan],
591+
dtype="object" if not using_infer_string else "string[pyarrow_numpy]",
592+
name="u3",
593+
),
594+
columns=Index(["d1", "d2", "d3", "d4"]),
591595
)
592596
tm.assert_frame_equal(result, expected)
593597

pandas/tests/indexing/test_at.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
CategoricalIndex,
1414
DataFrame,
1515
DatetimeIndex,
16+
Index,
1617
MultiIndex,
1718
Series,
1819
Timestamp,
@@ -70,7 +71,11 @@ def test_at_setitem_item_cache_cleared(self):
7071
df.at[0, "x"] = 4
7172
df.at[0, "cost"] = 789
7273

73-
expected = DataFrame({"x": [4], "cost": 789}, index=[0])
74+
expected = DataFrame(
75+
{"x": [4], "cost": 789},
76+
index=[0],
77+
columns=Index(["x", "cost"], dtype=object),
78+
)
7479
tm.assert_frame_equal(df, expected)
7580

7681
# And in particular, check that the _item_cache has updated correctly.

pandas/tests/indexing/test_categorical.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ def test_slicing_doc_examples(self):
273273
tm.assert_frame_equal(result, expected)
274274

275275
result = df.iloc[2:4, :].dtypes
276-
expected = Series(["category", "int64"], ["cats", "values"])
276+
expected = Series(["category", "int64"], ["cats", "values"], dtype=object)
277277
tm.assert_series_equal(result, expected)
278278

279279
result = df.loc["h":"j", "cats"]

pandas/tests/indexing/test_chaining_and_caching.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,9 @@ def test_detect_chained_assignment_object_dtype(
339339
self, using_array_manager, using_copy_on_write, warn_copy_on_write
340340
):
341341
expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]})
342-
df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})
342+
df = DataFrame(
343+
{"A": Series(["aaa", "bbb", "ccc"], dtype=object), "B": [1, 2, 3]}
344+
)
343345
df_original = df.copy()
344346

345347
if not using_copy_on_write and not warn_copy_on_write:

pandas/tests/indexing/test_coercion.py

+18-9
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
import numpy as np
1010
import pytest
1111

12+
from pandas._config import using_pyarrow_string_dtype
13+
1214
from pandas.compat import (
1315
IS64,
1416
is_platform_windows,
@@ -111,7 +113,7 @@ def _assert_setitem_index_conversion(
111113
"val,exp_dtype", [("x", object), (5, IndexError), (1.1, object)]
112114
)
113115
def test_setitem_index_object(self, val, exp_dtype):
114-
obj = pd.Series([1, 2, 3, 4], index=list("abcd"))
116+
obj = pd.Series([1, 2, 3, 4], index=pd.Index(list("abcd"), dtype=object))
115117
assert obj.index.dtype == object
116118

117119
if exp_dtype is IndexError:
@@ -122,7 +124,7 @@ def test_setitem_index_object(self, val, exp_dtype):
122124
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
123125
temp[5] = 5
124126
else:
125-
exp_index = pd.Index(list("abcd") + [val])
127+
exp_index = pd.Index(list("abcd") + [val], dtype=object)
126128
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
127129

128130
@pytest.mark.parametrize(
@@ -195,10 +197,10 @@ def _assert_insert_conversion(self, original, value, expected, expected_dtype):
195197
],
196198
)
197199
def test_insert_index_object(self, insert, coerced_val, coerced_dtype):
198-
obj = pd.Index(list("abcd"))
200+
obj = pd.Index(list("abcd"), dtype=object)
199201
assert obj.dtype == object
200202

201-
exp = pd.Index(["a", coerced_val, "b", "c", "d"])
203+
exp = pd.Index(["a", coerced_val, "b", "c", "d"], dtype=object)
202204
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
203205

204206
@pytest.mark.parametrize(
@@ -397,7 +399,7 @@ def _run_test(self, obj, fill_val, klass, exp_dtype):
397399
)
398400
def test_where_object(self, index_or_series, fill_val, exp_dtype):
399401
klass = index_or_series
400-
obj = klass(list("abcd"))
402+
obj = klass(list("abcd"), dtype=object)
401403
assert obj.dtype == object
402404
self._run_test(obj, fill_val, klass, exp_dtype)
403405

@@ -559,10 +561,10 @@ def _assert_fillna_conversion(self, original, value, expected, expected_dtype):
559561
)
560562
def test_fillna_object(self, index_or_series, fill_val, fill_dtype):
561563
klass = index_or_series
562-
obj = klass(["a", np.nan, "c", "d"])
564+
obj = klass(["a", np.nan, "c", "d"], dtype=object)
563565
assert obj.dtype == object
564566

565-
exp = klass(["a", fill_val, "c", "d"])
567+
exp = klass(["a", fill_val, "c", "d"], dtype=object)
566568
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
567569

568570
@pytest.mark.parametrize(
@@ -824,6 +826,8 @@ def replacer(self, how, from_key, to_key):
824826
raise ValueError
825827
return replacer
826828

829+
# Expected needs adjustment for the infer string option, seems to work as expecetd
830+
@pytest.mark.skipif(using_pyarrow_string_dtype(), reason="TODO: test is to complex")
827831
def test_replace_series(self, how, to_key, from_key, replacer):
828832
index = pd.Index([3, 4], name="xxx")
829833
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
@@ -870,13 +874,18 @@ def test_replace_series(self, how, to_key, from_key, replacer):
870874
@pytest.mark.parametrize(
871875
"from_key", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], indirect=True
872876
)
873-
def test_replace_series_datetime_tz(self, how, to_key, from_key, replacer):
877+
def test_replace_series_datetime_tz(
878+
self, how, to_key, from_key, replacer, using_infer_string
879+
):
874880
index = pd.Index([3, 4], name="xyz")
875881
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
876882
assert obj.dtype == from_key
877883

878884
exp = pd.Series(self.rep[to_key], index=index, name="yyy")
879-
assert exp.dtype == to_key
885+
if using_infer_string and to_key == "object":
886+
assert exp.dtype == "string"
887+
else:
888+
assert exp.dtype == to_key
880889

881890
msg = "Downcasting behavior in `replace`"
882891
warn = FutureWarning if exp.dtype != object else None

pandas/tests/indexing/test_iloc.py

+17-6
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,8 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage
100100
# we retain the object dtype.
101101
frame = DataFrame({0: np.array([0, 1, 2], dtype=object), 1: range(3)})
102102
df = frame.copy()
103-
orig_vals = df.values
104103
indexer(df)[key, 0] = cat
105-
expected = DataFrame({0: cat.astype(object), 1: range(3)})
104+
expected = DataFrame({0: Series(cat.astype(object), dtype=object), 1: range(3)})
106105
tm.assert_frame_equal(df, expected)
107106

108107
@pytest.mark.parametrize("box", [array, Series])
@@ -232,7 +231,10 @@ def test_iloc_exceeds_bounds(self):
232231
dfl = DataFrame(
233232
np.random.default_rng(2).standard_normal((5, 2)), columns=list("AB")
234233
)
235-
tm.assert_frame_equal(dfl.iloc[:, 2:3], DataFrame(index=dfl.index, columns=[]))
234+
tm.assert_frame_equal(
235+
dfl.iloc[:, 2:3],
236+
DataFrame(index=dfl.index, columns=Index([], dtype=dfl.columns.dtype)),
237+
)
236238
tm.assert_frame_equal(dfl.iloc[:, 1:3], dfl.iloc[:, [1]])
237239
tm.assert_frame_equal(dfl.iloc[4:6], dfl.iloc[[4]])
238240

@@ -451,12 +453,16 @@ def test_iloc_setitem(self):
451453
def test_iloc_setitem_axis_argument(self):
452454
# GH45032
453455
df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]])
456+
df[1] = df[1].astype(object)
454457
expected = DataFrame([[6, "c", 10], [7, "d", 11], [5, 5, 5]])
458+
expected[1] = expected[1].astype(object)
455459
df.iloc(axis=0)[2] = 5
456460
tm.assert_frame_equal(df, expected)
457461

458462
df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]])
463+
df[1] = df[1].astype(object)
459464
expected = DataFrame([[6, "c", 5], [7, "d", 5], [8, "e", 5]])
465+
expected[1] = expected[1].astype(object)
460466
df.iloc(axis=1)[2] = 5
461467
tm.assert_frame_equal(df, expected)
462468

@@ -615,7 +621,7 @@ def test_iloc_getitem_labelled_frame(self):
615621
assert result == exp
616622

617623
# out-of-bounds exception
618-
msg = "index 5 is out of bounds for axis 0 with size 4"
624+
msg = "index 5 is out of bounds for axis 0 with size 4|index out of bounds"
619625
with pytest.raises(IndexError, match=msg):
620626
df.iloc[10, 5]
621627

@@ -1313,7 +1319,9 @@ def test_iloc_setitem_dtypes_duplicate_columns(
13131319
self, dtypes, init_value, expected_value
13141320
):
13151321
# GH#22035
1316-
df = DataFrame([[init_value, "str", "str2"]], columns=["a", "b", "b"])
1322+
df = DataFrame(
1323+
[[init_value, "str", "str2"]], columns=["a", "b", "b"], dtype=object
1324+
)
13171325

13181326
# with the enforcement of GH#45333 in 2.0, this sets values inplace,
13191327
# so we retain object dtype
@@ -1360,7 +1368,10 @@ def test_frame_iloc_getitem_callable(self):
13601368

13611369
def test_frame_iloc_setitem_callable(self):
13621370
# GH#11485
1363-
df = DataFrame({"X": [1, 2, 3, 4], "Y": list("aabb")}, index=list("ABCD"))
1371+
df = DataFrame(
1372+
{"X": [1, 2, 3, 4], "Y": Series(list("aabb"), dtype=object)},
1373+
index=list("ABCD"),
1374+
)
13641375

13651376
# return location
13661377
res = df.copy()

pandas/tests/indexing/test_indexing.py

+33-15
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
import numpy as np
99
import pytest
1010

11+
from pandas._config import using_pyarrow_string_dtype
12+
1113
from pandas.errors import IndexingError
1214

1315
from pandas.core.dtypes.common import (
@@ -189,7 +191,7 @@ def test_setitem_dtype_upcast(self):
189191
):
190192
df.loc[0, "c"] = "foo"
191193
expected = DataFrame(
192-
[{"a": 1, "b": np.nan, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}]
194+
{"a": [1, 3], "b": [np.nan, 2], "c": Series(["foo", np.nan], dtype=object)}
193195
)
194196
tm.assert_frame_equal(df, expected)
195197

@@ -284,18 +286,27 @@ def test_dups_fancy_indexing_not_in_order(self):
284286
with pytest.raises(KeyError, match="not in index"):
285287
df.loc[rows]
286288

287-
def test_dups_fancy_indexing_only_missing_label(self):
289+
def test_dups_fancy_indexing_only_missing_label(self, using_infer_string):
288290
# List containing only missing label
289291
dfnu = DataFrame(
290292
np.random.default_rng(2).standard_normal((5, 3)), index=list("AABCD")
291293
)
292-
with pytest.raises(
293-
KeyError,
294-
match=re.escape(
295-
"\"None of [Index(['E'], dtype='object')] are in the [index]\""
296-
),
297-
):
298-
dfnu.loc[["E"]]
294+
if using_infer_string:
295+
with pytest.raises(
296+
KeyError,
297+
match=re.escape(
298+
"\"None of [Index(['E'], dtype='string')] are in the [index]\""
299+
),
300+
):
301+
dfnu.loc[["E"]]
302+
else:
303+
with pytest.raises(
304+
KeyError,
305+
match=re.escape(
306+
"\"None of [Index(['E'], dtype='object')] are in the [index]\""
307+
),
308+
):
309+
dfnu.loc[["E"]]
299310

300311
@pytest.mark.parametrize("vals", [[0, 1, 2], list("abc")])
301312
def test_dups_fancy_indexing_missing_label(self, vals):
@@ -451,6 +462,9 @@ def test_set_index_nan(self):
451462
)
452463
tm.assert_frame_equal(result, df)
453464

465+
@pytest.mark.xfail(
466+
using_pyarrow_string_dtype(), reason="can't multiply arrow strings"
467+
)
454468
def test_multi_assign(self):
455469
# GH 3626, an assignment of a sub-df to a df
456470
# set float64 to avoid upcast when setting nan
@@ -553,7 +567,7 @@ def test_string_slice_empty(self):
553567
with pytest.raises(KeyError, match="^0$"):
554568
df.loc["2011", 0]
555569

556-
def test_astype_assignment(self):
570+
def test_astype_assignment(self, using_infer_string):
557571
# GH4312 (iloc)
558572
df_orig = DataFrame(
559573
[["1", "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
@@ -567,8 +581,9 @@ def test_astype_assignment(self):
567581
expected = DataFrame(
568582
[[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
569583
)
570-
expected["A"] = expected["A"].astype(object)
571-
expected["B"] = expected["B"].astype(object)
584+
if not using_infer_string:
585+
expected["A"] = expected["A"].astype(object)
586+
expected["B"] = expected["B"].astype(object)
572587
tm.assert_frame_equal(df, expected)
573588

574589
# GH5702 (loc)
@@ -577,16 +592,18 @@ def test_astype_assignment(self):
577592
expected = DataFrame(
578593
[[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
579594
)
580-
expected["A"] = expected["A"].astype(object)
595+
if not using_infer_string:
596+
expected["A"] = expected["A"].astype(object)
581597
tm.assert_frame_equal(df, expected)
582598

583599
df = df_orig.copy()
584600
df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64)
585601
expected = DataFrame(
586602
[["1", 2, 3, ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
587603
)
588-
expected["B"] = expected["B"].astype(object)
589-
expected["C"] = expected["C"].astype(object)
604+
if not using_infer_string:
605+
expected["B"] = expected["B"].astype(object)
606+
expected["C"] = expected["C"].astype(object)
590607
tm.assert_frame_equal(df, expected)
591608

592609
def test_astype_assignment_full_replacements(self):
@@ -673,6 +690,7 @@ def test_loc_setitem_fullindex_views(self):
673690
df.loc[df.index] = df.loc[df.index]
674691
tm.assert_frame_equal(df, df2)
675692

693+
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="can't set int into string")
676694
def test_rhs_alignment(self):
677695
# GH8258, tests that both rows & columns are aligned to what is
678696
# assigned to. covers both uniform data-type & multi-type cases

0 commit comments

Comments
 (0)