Skip to content

Commit 91ddc8b

Browse files
authored
Adjust Index specific tests for string option (#56074)
* BUG: setitem casting object Index to arrow strings * Fix * Start fixing index tests * BUG: Index.isin raising for arrow strings and null set * Fix more tests * TST: Fix shares_memory for arrow string dtype * TST: Fix shares_memory for arrow string dtype * TST: Fix shares_memory for arrow string dtype * Fix more tests * BUG: Index.getitem returning wrong result with negative step for arrow * Update * Update * Fix * Update array.py * Fix * Move * Move * Fix * Add gh ref * Update v2.1.4.rst * Finish * Update * Update test_base.py * Update test_old_base.py * Update conftest.py * Update conftest.py * Update test_old_base.py * Update * Update test_setops.py * Fix pre-commit
1 parent fb05cc7 commit 91ddc8b

21 files changed

+173
-76
lines changed

pandas/conftest.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1903,7 +1903,7 @@ def using_copy_on_write() -> bool:
19031903
@pytest.fixture
19041904
def warn_copy_on_write() -> bool:
19051905
"""
1906-
Fixture to check if Copy-on-Write is enabled.
1906+
Fixture to check if Copy-on-Write is in warning mode.
19071907
"""
19081908
return (
19091909
pd.options.mode.copy_on_write == "warn"
@@ -1914,9 +1914,9 @@ def warn_copy_on_write() -> bool:
19141914
@pytest.fixture
19151915
def using_infer_string() -> bool:
19161916
"""
1917-
Fixture to check if infer_string is enabled.
1917+
Fixture to check if infer string option is enabled.
19181918
"""
1919-
return pd.options.future.infer_string
1919+
return pd.options.future.infer_string is True
19201920

19211921

19221922
warsaws = ["Europe/Warsaw", "dateutil/Europe/Warsaw"]

pandas/tests/indexes/base_class/test_formats.py

+3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
22
import pytest
33

4+
from pandas._config import using_pyarrow_string_dtype
45
import pandas._config.config as cf
56

67
from pandas import Index
@@ -15,6 +16,7 @@ def test_repr_is_valid_construction_code(self):
1516
res = eval(repr(idx))
1617
tm.assert_index_equal(res, idx)
1718

19+
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="repr different")
1820
@pytest.mark.parametrize(
1921
"index,expected",
2022
[
@@ -79,6 +81,7 @@ def test_string_index_repr(self, index, expected):
7981
result = repr(index)
8082
assert result == expected
8183

84+
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="repr different")
8285
@pytest.mark.parametrize(
8386
"index,expected",
8487
[

pandas/tests/indexes/base_class/test_reshape.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,15 @@ def test_insert(self):
3333

3434
# test empty
3535
null_index = Index([])
36-
tm.assert_index_equal(Index(["a"]), null_index.insert(0, "a"))
36+
tm.assert_index_equal(Index(["a"], dtype=object), null_index.insert(0, "a"))
3737

38-
def test_insert_missing(self, nulls_fixture):
38+
def test_insert_missing(self, nulls_fixture, using_infer_string):
3939
# GH#22295
4040
# test there is no mangling of NA values
41-
expected = Index(["a", nulls_fixture, "b", "c"])
42-
result = Index(list("abc")).insert(1, nulls_fixture)
41+
expected = Index(["a", nulls_fixture, "b", "c"], dtype=object)
42+
result = Index(list("abc"), dtype=object).insert(
43+
1, Index([nulls_fixture], dtype=object)
44+
)
4345
tm.assert_index_equal(result, expected)
4446

4547
@pytest.mark.parametrize(

pandas/tests/indexes/base_class/test_setops.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def test_intersection_str_dates(self, sort):
154154
def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort):
155155
# non-monotonic non-unique
156156
index1 = Index(["A", "B", "A", "C"])
157-
expected = Index(expected_arr, dtype="object")
157+
expected = Index(expected_arr)
158158
result = index1.intersection(index2, sort=sort)
159159
if sort is None:
160160
expected = expected.sort_values()

pandas/tests/indexes/categorical/test_astype.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def test_astype(self):
1818
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
1919

2020
result = ci.astype(object)
21-
tm.assert_index_equal(result, Index(np.array(ci)))
21+
tm.assert_index_equal(result, Index(np.array(ci), dtype=object))
2222

2323
# this IS equal, but not the same class
2424
assert result.equals(ci)

pandas/tests/indexes/categorical/test_category.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
from pandas._config import using_pyarrow_string_dtype
5+
46
from pandas._libs import index as libindex
57
from pandas._libs.arrays import NDArrayBacked
68

@@ -47,7 +49,7 @@ def test_insert(self, simple_index):
4749

4850
# invalid -> cast to object
4951
expected = ci.astype(object).insert(0, "d")
50-
result = ci.insert(0, "d")
52+
result = ci.insert(0, "d").astype(object)
5153
tm.assert_index_equal(result, expected, exact=True)
5254

5355
# GH 18295 (test missing)
@@ -194,6 +196,7 @@ def test_unique(self, data, categories, expected_data, ordered):
194196
expected = CategoricalIndex(expected_data, dtype=dtype)
195197
tm.assert_index_equal(idx.unique(), expected)
196198

199+
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="repr doesn't roundtrip")
197200
def test_repr_roundtrip(self):
198201
ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
199202
str(ci)

pandas/tests/indexes/categorical/test_formats.py

+4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
"""
22
Tests for CategoricalIndex.__repr__ and related methods.
33
"""
4+
import pytest
5+
6+
from pandas._config import using_pyarrow_string_dtype
47
import pandas._config.config as cf
58

69
from pandas import CategoricalIndex
@@ -16,6 +19,7 @@ def test_format_different_scalar_lengths(self):
1619
with tm.assert_produces_warning(FutureWarning, match=msg):
1720
assert idx.format() == expected
1821

22+
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="repr different")
1923
def test_string_categorical_index_repr(self):
2024
# short
2125
idx = CategoricalIndex(["a", "bb", "ccc"])

pandas/tests/indexes/categorical/test_reindex.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def test_reindex_duplicate_target(self):
4040
# See GH25459
4141
cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])
4242
res, indexer = cat.reindex(["a", "c", "c"])
43-
exp = Index(["a", "c", "c"], dtype="object")
43+
exp = Index(["a", "c", "c"])
4444
tm.assert_index_equal(res, exp, exact=True)
4545
tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
4646

pandas/tests/indexes/datetimes/methods/test_map.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def test_map(self):
1616

1717
f = lambda x: x.strftime("%Y%m%d")
1818
result = rng.map(f)
19-
exp = Index([f(x) for x in rng], dtype="<U8")
19+
exp = Index([f(x) for x in rng])
2020
tm.assert_index_equal(result, exp)
2121

2222
def test_map_fallthrough(self, capsys):

pandas/tests/indexes/interval/test_formats.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
from pandas._config import using_pyarrow_string_dtype
5+
46
from pandas import (
57
DataFrame,
68
DatetimeIndex,
@@ -31,13 +33,16 @@ class TestIntervalIndexRendering:
3133
(DataFrame, (" 0\n(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c")),
3234
],
3335
)
34-
def test_repr_missing(self, constructor, expected):
36+
def test_repr_missing(self, constructor, expected, using_infer_string, request):
3537
# GH 25984
38+
if using_infer_string and constructor is Series:
39+
request.applymarker(pytest.mark.xfail(reason="repr different"))
3640
index = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
3741
obj = constructor(list("abc"), index=index)
3842
result = repr(obj)
3943
assert result == expected
4044

45+
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="repr different")
4146
def test_repr_floats(self):
4247
# GH 32553
4348

pandas/tests/indexes/multi/test_constructors.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -847,11 +847,14 @@ def test_multiindex_inference_consistency():
847847
assert lev.dtype == object
848848

849849

850-
def test_dtype_representation():
850+
def test_dtype_representation(using_infer_string):
851851
# GH#46900
852852
pmidx = MultiIndex.from_arrays([[1], ["a"]], names=[("a", "b"), ("c", "d")])
853853
result = pmidx.dtypes
854+
exp = "object" if not using_infer_string else "string"
854855
expected = Series(
855-
["int64", "object"], index=MultiIndex.from_tuples([("a", "b"), ("c", "d")])
856+
["int64", exp],
857+
index=MultiIndex.from_tuples([("a", "b"), ("c", "d")]),
858+
dtype=object,
856859
)
857860
tm.assert_series_equal(result, expected)

pandas/tests/indexes/multi/test_get_set.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -34,23 +34,25 @@ def test_get_level_number_integer(idx):
3434
idx._get_level_number("fourth")
3535

3636

37-
def test_get_dtypes():
37+
def test_get_dtypes(using_infer_string):
3838
# Test MultiIndex.dtypes (# Gh37062)
3939
idx_multitype = MultiIndex.from_product(
4040
[[1, 2, 3], ["a", "b", "c"], pd.date_range("20200101", periods=2, tz="UTC")],
4141
names=["int", "string", "dt"],
4242
)
43+
44+
exp = "object" if not using_infer_string else "string"
4345
expected = pd.Series(
4446
{
4547
"int": np.dtype("int64"),
46-
"string": np.dtype("O"),
48+
"string": exp,
4749
"dt": DatetimeTZDtype(tz="utc"),
4850
}
4951
)
5052
tm.assert_series_equal(expected, idx_multitype.dtypes)
5153

5254

53-
def test_get_dtypes_no_level_name():
55+
def test_get_dtypes_no_level_name(using_infer_string):
5456
# Test MultiIndex.dtypes (# GH38580 )
5557
idx_multitype = MultiIndex.from_product(
5658
[
@@ -59,17 +61,18 @@ def test_get_dtypes_no_level_name():
5961
pd.date_range("20200101", periods=2, tz="UTC"),
6062
],
6163
)
64+
exp = "object" if not using_infer_string else "string"
6265
expected = pd.Series(
6366
{
6467
"level_0": np.dtype("int64"),
65-
"level_1": np.dtype("O"),
68+
"level_1": exp,
6669
"level_2": DatetimeTZDtype(tz="utc"),
6770
}
6871
)
6972
tm.assert_series_equal(expected, idx_multitype.dtypes)
7073

7174

72-
def test_get_dtypes_duplicate_level_names():
75+
def test_get_dtypes_duplicate_level_names(using_infer_string):
7376
# Test MultiIndex.dtypes with non-unique level names (# GH45174)
7477
result = MultiIndex.from_product(
7578
[
@@ -79,8 +82,9 @@ def test_get_dtypes_duplicate_level_names():
7982
],
8083
names=["A", "A", "A"],
8184
).dtypes
85+
exp = "object" if not using_infer_string else "string"
8286
expected = pd.Series(
83-
[np.dtype("int64"), np.dtype("O"), DatetimeTZDtype(tz="utc")],
87+
[np.dtype("int64"), exp, DatetimeTZDtype(tz="utc")],
8488
index=["A", "A", "A"],
8589
)
8690
tm.assert_series_equal(result, expected)

pandas/tests/indexes/multi/test_reindex.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,14 @@ def test_reindex_lvl_preserves_names_when_target_is_list_or_array():
7575
assert idx.reindex([], level=1)[0].names == ["foo", "bar"]
7676

7777

78-
def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array():
78+
def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(
79+
using_infer_string,
80+
):
7981
# GH7774
8082
idx = MultiIndex.from_product([[0, 1], ["a", "b"]])
8183
assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64
82-
assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_
84+
exp = np.object_ if not using_infer_string else str
85+
assert idx.reindex([], level=1)[0].levels[1].dtype.type == exp
8386

8487
# case with EA levels
8588
cat = pd.Categorical(["foo", "bar"])

pandas/tests/indexes/multi/test_setops.py

+17-8
Original file line numberDiff line numberDiff line change
@@ -263,19 +263,23 @@ def test_union(idx, sort):
263263
assert result.equals(idx)
264264

265265

266-
def test_union_with_regular_index(idx):
266+
def test_union_with_regular_index(idx, using_infer_string):
267267
other = Index(["A", "B", "C"])
268268

269269
result = other.union(idx)
270270
assert ("foo", "one") in result
271271
assert "B" in result
272272

273-
msg = "The values in the array are unorderable"
274-
with tm.assert_produces_warning(RuntimeWarning, match=msg):
275-
result2 = idx.union(other)
276-
# This is more consistent now, if sorting fails then we don't sort at all
277-
# in the MultiIndex case.
278-
assert not result.equals(result2)
273+
if using_infer_string:
274+
with pytest.raises(NotImplementedError, match="Can only union"):
275+
idx.union(other)
276+
else:
277+
msg = "The values in the array are unorderable"
278+
with tm.assert_produces_warning(RuntimeWarning, match=msg):
279+
result2 = idx.union(other)
280+
# This is more consistent now, if sorting fails then we don't sort at all
281+
# in the MultiIndex case.
282+
assert not result.equals(result2)
279283

280284

281285
def test_intersection(idx, sort):
@@ -756,7 +760,12 @@ def test_intersection_keep_ea_dtypes(val, any_numeric_ea_dtype):
756760

757761
def test_union_with_na_when_constructing_dataframe():
758762
# GH43222
759-
series1 = Series((1,), index=MultiIndex.from_tuples(((None, None),)))
763+
series1 = Series(
764+
(1,),
765+
index=MultiIndex.from_arrays(
766+
[Series([None], dtype="string"), Series([None], dtype="string")]
767+
),
768+
)
760769
series2 = Series((10, 20), index=MultiIndex.from_tuples(((None, None), ("a", "b"))))
761770
result = DataFrame([series1, series2])
762771
expected = DataFrame({(np.nan, np.nan): [1.0, 10.0], ("a", "b"): [np.nan, 20.0]})

pandas/tests/indexes/object/test_astype.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def test_astype_str_from_bytes():
2020

2121
# while we're here, check that Series.astype behaves the same
2222
result = Series(idx).astype(str)
23-
expected = Series(expected)
23+
expected = Series(expected, dtype=object)
2424
tm.assert_series_equal(result, expected)
2525

2626

0 commit comments

Comments
 (0)