Skip to content

Commit b7dedf5

Browse files
TST (string dtype): Adjust indexing string tests (#59541)
Co-authored-by: Joris Van den Bossche <[email protected]>
1 parent 13f45e7 commit b7dedf5

File tree

6 files changed

+57
-51
lines changed

6 files changed

+57
-51
lines changed

pandas/core/arrays/string_.py

+4
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,10 @@ def __setitem__(self, key, value) -> None:
715715
else:
716716
if not is_array_like(value):
717717
value = np.asarray(value, dtype=object)
718+
else:
719+
# cast categories and friends to arrays to see if values are
720+
# compatible, compatibility with arrow backed strings
721+
value = np.asarray(value)
718722
if len(value) and not lib.is_string_array(value, skipna=True):
719723
raise TypeError("Must provide strings.")
720724

pandas/core/arrays/string_arrow.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ def _maybe_convert_setitem_value(self, value):
240240
value[isna(value)] = None
241241
for v in value:
242242
if not (v is None or isinstance(v, str)):
243-
raise TypeError("Scalar must be NA or str")
243+
raise TypeError("Must provide strings")
244244
return super()._maybe_convert_setitem_value(value)
245245

246246
def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:

pandas/tests/arrays/string_/test_string.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -102,10 +102,7 @@ def test_setitem_validates(cls, dtype):
102102
with pytest.raises(TypeError, match=msg):
103103
arr[0] = 10
104104

105-
if dtype.storage == "python":
106-
msg = "Must provide strings."
107-
else:
108-
msg = "Scalar must be NA or str"
105+
msg = "Must provide strings"
109106
with pytest.raises(TypeError, match=msg):
110107
arr[:] = np.array([1, 2])
111108

pandas/tests/indexing/test_iloc.py

+16-15
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
import numpy as np
77
import pytest
88

9-
from pandas._config import using_string_dtype
10-
119
from pandas.errors import IndexingError
1210

1311
from pandas import (
@@ -1198,22 +1196,25 @@ def test_iloc_getitem_int_single_ea_block_view(self):
11981196
arr[2] = arr[-1]
11991197
assert ser[0] == arr[-1]
12001198

1201-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
1202-
def test_iloc_setitem_multicolumn_to_datetime(self):
1199+
def test_iloc_setitem_multicolumn_to_datetime(self, using_infer_string):
12031200
# GH#20511
12041201
df = DataFrame({"A": ["2022-01-01", "2022-01-02"], "B": ["2021", "2022"]})
12051202

1206-
df.iloc[:, [0]] = DataFrame({"A": to_datetime(["2021", "2022"])})
1207-
expected = DataFrame(
1208-
{
1209-
"A": [
1210-
Timestamp("2021-01-01 00:00:00"),
1211-
Timestamp("2022-01-01 00:00:00"),
1212-
],
1213-
"B": ["2021", "2022"],
1214-
}
1215-
)
1216-
tm.assert_frame_equal(df, expected, check_dtype=False)
1203+
if using_infer_string:
1204+
with pytest.raises(TypeError, match="Invalid value"):
1205+
df.iloc[:, [0]] = DataFrame({"A": to_datetime(["2021", "2022"])})
1206+
else:
1207+
df.iloc[:, [0]] = DataFrame({"A": to_datetime(["2021", "2022"])})
1208+
expected = DataFrame(
1209+
{
1210+
"A": [
1211+
Timestamp("2021-01-01 00:00:00"),
1212+
Timestamp("2022-01-01 00:00:00"),
1213+
],
1214+
"B": ["2021", "2022"],
1215+
}
1216+
)
1217+
tm.assert_frame_equal(df, expected, check_dtype=False)
12171218

12181219

12191220
class TestILocErrors:

pandas/tests/indexing/test_indexing.py

+7-11
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
import numpy as np
99
import pytest
1010

11-
from pandas._config import using_string_dtype
12-
1311
from pandas.errors import IndexingError
1412

1513
from pandas.core.dtypes.common import (
@@ -528,12 +526,12 @@ def test_string_slice_empty(self):
528526
with pytest.raises(KeyError, match="^0$"):
529527
df.loc["2011", 0]
530528

531-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
532529
def test_astype_assignment(self, using_infer_string):
533530
# GH4312 (iloc)
534531
df_orig = DataFrame(
535532
[["1", "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
536533
)
534+
df_orig[list("ABCDG")] = df_orig[list("ABCDG")].astype(object)
537535

538536
df = df_orig.copy()
539537

@@ -543,9 +541,9 @@ def test_astype_assignment(self, using_infer_string):
543541
expected = DataFrame(
544542
[[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
545543
)
546-
if not using_infer_string:
547-
expected["A"] = expected["A"].astype(object)
548-
expected["B"] = expected["B"].astype(object)
544+
expected[list("CDG")] = expected[list("CDG")].astype(object)
545+
expected["A"] = expected["A"].astype(object)
546+
expected["B"] = expected["B"].astype(object)
549547
tm.assert_frame_equal(df, expected)
550548

551549
# GH5702 (loc)
@@ -554,18 +552,16 @@ def test_astype_assignment(self, using_infer_string):
554552
expected = DataFrame(
555553
[[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
556554
)
557-
if not using_infer_string:
558-
expected["A"] = expected["A"].astype(object)
555+
expected[list("ABCDG")] = expected[list("ABCDG")].astype(object)
559556
tm.assert_frame_equal(df, expected)
560557

561558
df = df_orig.copy()
559+
562560
df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype(np.int64)
563561
expected = DataFrame(
564562
[["1", 2, 3, ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG")
565563
)
566-
if not using_infer_string:
567-
expected["B"] = expected["B"].astype(object)
568-
expected["C"] = expected["C"].astype(object)
564+
expected[list("ABCDG")] = expected[list("ABCDG")].astype(object)
569565
tm.assert_frame_equal(df, expected)
570566

571567
def test_astype_assignment_full_replacements(self):

pandas/tests/indexing/test_loc.py

+28-20
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""test label based indexing with loc"""
22

33
from collections import namedtuple
4+
import contextlib
45
from datetime import (
56
date,
67
datetime,
@@ -13,10 +14,7 @@
1314
import numpy as np
1415
import pytest
1516

16-
from pandas._config import using_string_dtype
17-
1817
from pandas._libs import index as libindex
19-
from pandas.compat import HAS_PYARROW
2018
from pandas.errors import IndexingError
2119

2220
import pandas as pd
@@ -615,8 +613,7 @@ def test_loc_setitem_consistency_empty(self):
615613
expected["x"] = expected["x"].astype(np.int64)
616614
tm.assert_frame_equal(df, expected)
617615

618-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
619-
def test_loc_setitem_consistency_slice_column_len(self):
616+
def test_loc_setitem_consistency_slice_column_len(self, using_infer_string):
620617
# .loc[:,column] setting with slice == len of the column
621618
# GH10408
622619
levels = [
@@ -640,12 +637,23 @@ def test_loc_setitem_consistency_slice_column_len(self):
640637
]
641638
df = DataFrame(values, index=mi, columns=cols)
642639

643-
df.loc[:, ("Respondent", "StartDate")] = to_datetime(
644-
df.loc[:, ("Respondent", "StartDate")]
645-
)
646-
df.loc[:, ("Respondent", "EndDate")] = to_datetime(
647-
df.loc[:, ("Respondent", "EndDate")]
648-
)
640+
ctx = contextlib.nullcontext()
641+
if using_infer_string:
642+
ctx = pytest.raises(TypeError, match="Invalid value")
643+
644+
with ctx:
645+
df.loc[:, ("Respondent", "StartDate")] = to_datetime(
646+
df.loc[:, ("Respondent", "StartDate")]
647+
)
648+
with ctx:
649+
df.loc[:, ("Respondent", "EndDate")] = to_datetime(
650+
df.loc[:, ("Respondent", "EndDate")]
651+
)
652+
653+
if using_infer_string:
654+
# infer-objects won't infer stuff anymore
655+
return
656+
649657
df = df.infer_objects()
650658

651659
# Adding a new key
@@ -1211,20 +1219,23 @@ def test_loc_reverse_assignment(self):
12111219

12121220
tm.assert_series_equal(result, expected)
12131221

1214-
@pytest.mark.xfail(using_string_dtype(), reason="can't set int into string")
1215-
def test_loc_setitem_str_to_small_float_conversion_type(self):
1222+
def test_loc_setitem_str_to_small_float_conversion_type(self, using_infer_string):
12161223
# GH#20388
12171224

12181225
col_data = [str(np.random.default_rng(2).random() * 1e-12) for _ in range(5)]
12191226
result = DataFrame(col_data, columns=["A"])
1220-
expected = DataFrame(col_data, columns=["A"], dtype=object)
1227+
expected = DataFrame(col_data, columns=["A"])
12211228
tm.assert_frame_equal(result, expected)
12221229

12231230
# assigning with loc/iloc attempts to set the values inplace, which
12241231
# in this case is successful
1225-
result.loc[result.index, "A"] = [float(x) for x in col_data]
1226-
expected = DataFrame(col_data, columns=["A"], dtype=float).astype(object)
1227-
tm.assert_frame_equal(result, expected)
1232+
if using_infer_string:
1233+
with pytest.raises(TypeError, match="Must provide strings"):
1234+
result.loc[result.index, "A"] = [float(x) for x in col_data]
1235+
else:
1236+
result.loc[result.index, "A"] = [float(x) for x in col_data]
1237+
expected = DataFrame(col_data, columns=["A"], dtype=float).astype(object)
1238+
tm.assert_frame_equal(result, expected)
12281239

12291240
# assigning the entire column using __setitem__ swaps in the new array
12301241
# GH#???
@@ -1389,9 +1400,6 @@ def test_loc_setitem_categorical_values_partial_column_slice(self):
13891400
df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"])
13901401
df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"])
13911402

1392-
@pytest.mark.xfail(
1393-
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
1394-
)
13951403
def test_loc_setitem_single_row_categorical(self, using_infer_string):
13961404
# GH#25495
13971405
df = DataFrame({"Alpha": ["a"], "Numeric": [0]})

0 commit comments

Comments
 (0)