Skip to content

Adjust Index specific tests for string option #56074

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 48 commits into from
Dec 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
6accf2b
BUG: setitem casting object Index to arrow strings
phofl Oct 22, 2023
61f4903
Fix
phofl Oct 22, 2023
2effd1f
Start fixing index tests
phofl Nov 4, 2023
6569663
BUG: Index.isin raising for arrow strings and null set
phofl Nov 4, 2023
382b989
Merge branch '55821' into tests
phofl Nov 4, 2023
cdbdba2
Fix more tests
phofl Nov 4, 2023
57cb8f4
TST: Fix shares_memory for arrow string dtype
phofl Nov 4, 2023
7697059
TST: Fix shares_memory for arrow string dtype
phofl Nov 4, 2023
285f7bb
TST: Fix shares_memory for arrow string dtype
phofl Nov 4, 2023
4afaf69
Merge branch 'tm_shares' into tests
phofl Nov 4, 2023
f59c4bc
Fix more tests
phofl Nov 5, 2023
ff6e271
BUG: Index.getitem returning wrong result with negative step for arrow
phofl Nov 5, 2023
7518250
Merge branch 'main' into indexing_negative_steps
phofl Nov 5, 2023
7684911
Update
phofl Nov 5, 2023
7474cb2
Update
phofl Nov 5, 2023
22767d2
Merge branch 'indexing_negative_steps' into tests
phofl Nov 5, 2023
b27b0f8
Fix
phofl Nov 5, 2023
2f3f6ce
Merge branch 'indexing_negative_steps' into tests
phofl Nov 5, 2023
659577e
Update array.py
phofl Nov 5, 2023
11c0e86
Fix
phofl Nov 15, 2023
30097eb
Merge branch 'main' into indexing_negative_steps
phofl Nov 15, 2023
717c306
Merge branch 'main' into setitem_index_inference_string_dtype
phofl Nov 15, 2023
f55b62f
Move
phofl Nov 16, 2023
ea3fcfb
Move
phofl Nov 16, 2023
5c4bc50
Merge remote-tracking branch 'upstream/main' into indexing_negative_s…
phofl Nov 16, 2023
86fe4f1
Fix
phofl Nov 16, 2023
19b4613
Merge branch 'main' into setitem_index_inference_string_dtype
phofl Nov 17, 2023
2425529
Add gh ref
phofl Nov 17, 2023
84e815e
Update v2.1.4.rst
phofl Nov 17, 2023
0edead5
Merge remote-tracking branch 'upstream/main' into tests
phofl Nov 19, 2023
bc39314
Merge remote-tracking branch 'origin/setitem_index_inference_string_d…
phofl Nov 19, 2023
c4d9ba9
Finish
phofl Nov 20, 2023
ff4bd23
Merge remote-tracking branch 'upstream/main' into indexing_negative_s…
phofl Nov 21, 2023
06b4f89
Update
phofl Nov 21, 2023
96bcdb7
Merge remote-tracking branch 'upstream/main' into tests
phofl Nov 21, 2023
4d56f5b
Merge branch 'indexing_negative_steps' into tests
phofl Nov 21, 2023
acb37d8
Merge remote-tracking branch 'upstream/main' into tests
phofl Nov 21, 2023
dab0649
Merge branch 'main' into tests
phofl Nov 22, 2023
20a1049
Merge branch 'main' into tests
phofl Nov 29, 2023
02adc20
Update test_base.py
phofl Nov 30, 2023
cf28369
Update test_old_base.py
phofl Nov 30, 2023
94cf908
Update conftest.py
phofl Nov 30, 2023
671a353
Update conftest.py
phofl Nov 30, 2023
77f4711
Update test_old_base.py
phofl Nov 30, 2023
aa1a6cf
Update
phofl Nov 30, 2023
bd5e305
Update test_setops.py
phofl Nov 30, 2023
8673707
Merge branch 'main' into tests
phofl Dec 8, 2023
746b504
Fix pre-commit
phofl Dec 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1903,7 +1903,7 @@ def using_copy_on_write() -> bool:
@pytest.fixture
def warn_copy_on_write() -> bool:
"""
Fixture to check if Copy-on-Write is enabled.
Fixture to check if Copy-on-Write is in warning mode.
"""
return (
pd.options.mode.copy_on_write == "warn"
Expand All @@ -1914,9 +1914,9 @@ def warn_copy_on_write() -> bool:
@pytest.fixture
def using_infer_string() -> bool:
"""
Fixture to check if infer_string is enabled.
Fixture to check if infer string option is enabled.
"""
return pd.options.future.infer_string
return pd.options.future.infer_string is True


warsaws = ["Europe/Warsaw", "dateutil/Europe/Warsaw"]
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/indexes/base_class/test_formats.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype
import pandas._config.config as cf

from pandas import Index
Expand All @@ -15,6 +16,7 @@ def test_repr_is_valid_construction_code(self):
res = eval(repr(idx))
tm.assert_index_equal(res, idx)

@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="repr different")
@pytest.mark.parametrize(
"index,expected",
[
Expand Down Expand Up @@ -79,6 +81,7 @@ def test_string_index_repr(self, index, expected):
result = repr(index)
assert result == expected

@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="repr different")
@pytest.mark.parametrize(
"index,expected",
[
Expand Down
10 changes: 6 additions & 4 deletions pandas/tests/indexes/base_class/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,15 @@ def test_insert(self):

# test empty
null_index = Index([])
tm.assert_index_equal(Index(["a"]), null_index.insert(0, "a"))
tm.assert_index_equal(Index(["a"], dtype=object), null_index.insert(0, "a"))

def test_insert_missing(self, nulls_fixture):
def test_insert_missing(self, nulls_fixture, using_infer_string):
# GH#22295
# test there is no mangling of NA values
expected = Index(["a", nulls_fixture, "b", "c"])
result = Index(list("abc")).insert(1, nulls_fixture)
expected = Index(["a", nulls_fixture, "b", "c"], dtype=object)
result = Index(list("abc"), dtype=object).insert(
1, Index([nulls_fixture], dtype=object)
)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize(
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/base_class/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def test_intersection_str_dates(self, sort):
def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort):
# non-monotonic non-unique
index1 = Index(["A", "B", "A", "C"])
expected = Index(expected_arr, dtype="object")
expected = Index(expected_arr)
result = index1.intersection(index2, sort=sort)
if sort is None:
expected = expected.sort_values()
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/categorical/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def test_astype(self):
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)

result = ci.astype(object)
tm.assert_index_equal(result, Index(np.array(ci)))
tm.assert_index_equal(result, Index(np.array(ci), dtype=object))

# this IS equal, but not the same class
assert result.equals(ci)
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/indexes/categorical/test_category.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype

from pandas._libs import index as libindex
from pandas._libs.arrays import NDArrayBacked

Expand Down Expand Up @@ -47,7 +49,7 @@ def test_insert(self, simple_index):

# invalid -> cast to object
expected = ci.astype(object).insert(0, "d")
result = ci.insert(0, "d")
result = ci.insert(0, "d").astype(object)
tm.assert_index_equal(result, expected, exact=True)

# GH 18295 (test missing)
Expand Down Expand Up @@ -194,6 +196,7 @@ def test_unique(self, data, categories, expected_data, ordered):
expected = CategoricalIndex(expected_data, dtype=dtype)
tm.assert_index_equal(idx.unique(), expected)

@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="repr doesn't roundtrip")
def test_repr_roundtrip(self):
ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
str(ci)
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/indexes/categorical/test_formats.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
"""
Tests for CategoricalIndex.__repr__ and related methods.
"""
import pytest

from pandas._config import using_pyarrow_string_dtype
import pandas._config.config as cf

from pandas import CategoricalIndex
Expand All @@ -16,6 +19,7 @@ def test_format_different_scalar_lengths(self):
with tm.assert_produces_warning(FutureWarning, match=msg):
assert idx.format() == expected

@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="repr different")
def test_string_categorical_index_repr(self):
# short
idx = CategoricalIndex(["a", "bb", "ccc"])
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/categorical/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def test_reindex_duplicate_target(self):
# See GH25459
cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])
res, indexer = cat.reindex(["a", "c", "c"])
exp = Index(["a", "c", "c"], dtype="object")
exp = Index(["a", "c", "c"])
tm.assert_index_equal(res, exp, exact=True)
tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/datetimes/methods/test_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def test_map(self):

f = lambda x: x.strftime("%Y%m%d")
result = rng.map(f)
exp = Index([f(x) for x in rng], dtype="<U8")
exp = Index([f(x) for x in rng])
tm.assert_index_equal(result, exp)

def test_map_fallthrough(self, capsys):
Expand Down
7 changes: 6 additions & 1 deletion pandas/tests/indexes/interval/test_formats.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype

from pandas import (
DataFrame,
DatetimeIndex,
Expand Down Expand Up @@ -31,13 +33,16 @@ class TestIntervalIndexRendering:
(DataFrame, (" 0\n(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c")),
],
)
def test_repr_missing(self, constructor, expected):
def test_repr_missing(self, constructor, expected, using_infer_string, request):
# GH 25984
if using_infer_string and constructor is Series:
request.applymarker(pytest.mark.xfail(reason="repr different"))
index = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
obj = constructor(list("abc"), index=index)
result = repr(obj)
assert result == expected

@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="repr different")
def test_repr_floats(self):
# GH 32553

Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/indexes/multi/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -847,11 +847,14 @@ def test_multiindex_inference_consistency():
assert lev.dtype == object


def test_dtype_representation():
def test_dtype_representation(using_infer_string):
# GH#46900
pmidx = MultiIndex.from_arrays([[1], ["a"]], names=[("a", "b"), ("c", "d")])
result = pmidx.dtypes
exp = "object" if not using_infer_string else "string"
expected = Series(
["int64", "object"], index=MultiIndex.from_tuples([("a", "b"), ("c", "d")])
["int64", exp],
index=MultiIndex.from_tuples([("a", "b"), ("c", "d")]),
dtype=object,
)
tm.assert_series_equal(result, expected)
16 changes: 10 additions & 6 deletions pandas/tests/indexes/multi/test_get_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,23 +34,25 @@ def test_get_level_number_integer(idx):
idx._get_level_number("fourth")


def test_get_dtypes():
def test_get_dtypes(using_infer_string):
# Test MultiIndex.dtypes (# Gh37062)
idx_multitype = MultiIndex.from_product(
[[1, 2, 3], ["a", "b", "c"], pd.date_range("20200101", periods=2, tz="UTC")],
names=["int", "string", "dt"],
)

exp = "object" if not using_infer_string else "string"
expected = pd.Series(
{
"int": np.dtype("int64"),
"string": np.dtype("O"),
"string": exp,
"dt": DatetimeTZDtype(tz="utc"),
}
)
tm.assert_series_equal(expected, idx_multitype.dtypes)


def test_get_dtypes_no_level_name():
def test_get_dtypes_no_level_name(using_infer_string):
# Test MultiIndex.dtypes (# GH38580 )
idx_multitype = MultiIndex.from_product(
[
Expand All @@ -59,17 +61,18 @@ def test_get_dtypes_no_level_name():
pd.date_range("20200101", periods=2, tz="UTC"),
],
)
exp = "object" if not using_infer_string else "string"
expected = pd.Series(
{
"level_0": np.dtype("int64"),
"level_1": np.dtype("O"),
"level_1": exp,
"level_2": DatetimeTZDtype(tz="utc"),
}
)
tm.assert_series_equal(expected, idx_multitype.dtypes)


def test_get_dtypes_duplicate_level_names():
def test_get_dtypes_duplicate_level_names(using_infer_string):
# Test MultiIndex.dtypes with non-unique level names (# GH45174)
result = MultiIndex.from_product(
[
Expand All @@ -79,8 +82,9 @@ def test_get_dtypes_duplicate_level_names():
],
names=["A", "A", "A"],
).dtypes
exp = "object" if not using_infer_string else "string"
expected = pd.Series(
[np.dtype("int64"), np.dtype("O"), DatetimeTZDtype(tz="utc")],
[np.dtype("int64"), exp, DatetimeTZDtype(tz="utc")],
index=["A", "A", "A"],
)
tm.assert_series_equal(result, expected)
Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/indexes/multi/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,14 @@ def test_reindex_lvl_preserves_names_when_target_is_list_or_array():
assert idx.reindex([], level=1)[0].names == ["foo", "bar"]


def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array():
def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(
using_infer_string,
):
# GH7774
idx = MultiIndex.from_product([[0, 1], ["a", "b"]])
assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64
assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_
exp = np.object_ if not using_infer_string else str
assert idx.reindex([], level=1)[0].levels[1].dtype.type == exp

# case with EA levels
cat = pd.Categorical(["foo", "bar"])
Expand Down
25 changes: 17 additions & 8 deletions pandas/tests/indexes/multi/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,19 +263,23 @@ def test_union(idx, sort):
assert result.equals(idx)


def test_union_with_regular_index(idx):
def test_union_with_regular_index(idx, using_infer_string):
other = Index(["A", "B", "C"])

result = other.union(idx)
assert ("foo", "one") in result
assert "B" in result

msg = "The values in the array are unorderable"
with tm.assert_produces_warning(RuntimeWarning, match=msg):
result2 = idx.union(other)
# This is more consistent now, if sorting fails then we don't sort at all
# in the MultiIndex case.
assert not result.equals(result2)
if using_infer_string:
with pytest.raises(NotImplementedError, match="Can only union"):
idx.union(other)
else:
msg = "The values in the array are unorderable"
with tm.assert_produces_warning(RuntimeWarning, match=msg):
result2 = idx.union(other)
# This is more consistent now, if sorting fails then we don't sort at all
# in the MultiIndex case.
assert not result.equals(result2)


def test_intersection(idx, sort):
Expand Down Expand Up @@ -756,7 +760,12 @@ def test_intersection_keep_ea_dtypes(val, any_numeric_ea_dtype):

def test_union_with_na_when_constructing_dataframe():
# GH43222
series1 = Series((1,), index=MultiIndex.from_tuples(((None, None),)))
series1 = Series(
(1,),
index=MultiIndex.from_arrays(
[Series([None], dtype="string"), Series([None], dtype="string")]
),
)
series2 = Series((10, 20), index=MultiIndex.from_tuples(((None, None), ("a", "b"))))
result = DataFrame([series1, series2])
expected = DataFrame({(np.nan, np.nan): [1.0, 10.0], ("a", "b"): [np.nan, 20.0]})
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/object/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_astype_str_from_bytes():

# while we're here, check that Series.astype behaves the same
result = Series(idx).astype(str)
expected = Series(expected)
expected = Series(expected, dtype=object)
tm.assert_series_equal(result, expected)


Expand Down
Loading