From c3070749e30d349b0d14e63de30eb37d99324860 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 30 Oct 2024 10:29:37 +0100 Subject: [PATCH 1/3] TST (string dtype): add explicit object vs str dtype to index fixture (#60116) (cherry picked from commit 7bd594c81acb5f6428e9ef54ba5a9da1f2860a89) --- pandas/conftest.py | 3 ++- pandas/tests/indexes/test_any_index.py | 2 +- pandas/tests/indexes/test_old_base.py | 2 +- pandas/tests/indexes/test_setops.py | 8 +++++++- pandas/tests/test_algos.py | 1 + 5 files changed, 12 insertions(+), 4 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index c6237d0309630..b0818b11ab037 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -615,7 +615,8 @@ def _create_mi_with_dt64tz_level(): indices_dict = { - "string": Index([f"pandas_{i}" for i in range(100)]), + "object": Index([f"pandas_{i}" for i in range(100)], dtype=object), + "string": Index([f"pandas_{i}" for i in range(100)], dtype="str"), "datetime": date_range("2020-01-01", periods=100), "datetime-tz": date_range("2020-01-01", periods=100, tz="US/Pacific"), "period": period_range("2020-01-01", periods=100, freq="D"), diff --git a/pandas/tests/indexes/test_any_index.py b/pandas/tests/indexes/test_any_index.py index 10204cfb78e89..8edeaf9c16083 100644 --- a/pandas/tests/indexes/test_any_index.py +++ b/pandas/tests/indexes/test_any_index.py @@ -45,7 +45,7 @@ def test_map_identity_mapping(index, request): # GH#12766 result = index.map(lambda x: x) - if index.dtype == object and result.dtype == bool: + if index.dtype == object and result.dtype in [bool, "string"]: assert (index == result).all() # TODO: could work that into the 'exact="equiv"'? return # FIXME: doesn't belong in this file anymore! diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py index 176bf893cafa8..10fdf367812de 100644 --- a/pandas/tests/indexes/test_old_base.py +++ b/pandas/tests/indexes/test_old_base.py @@ -260,7 +260,7 @@ def test_ensure_copied_data(self, index): "RangeIndex cannot be initialized from data, " "MultiIndex and CategoricalIndex are tested separately" ) - elif index.dtype == object and index.inferred_type == "boolean": + elif index.dtype == object and index.inferred_type in ["boolean", "string"]: init_kwargs["dtype"] = index.dtype index_type = type(index) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 72c3396f124b8..3845744dc0717 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -293,7 +293,13 @@ def test_difference_base(self, sort, index): first.difference([1, 2, 3], sort) @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") - def test_symmetric_difference(self, index): + def test_symmetric_difference(self, index, using_infer_string, request): + if ( + using_infer_string + and index.dtype == "object" + and index.inferred_type == "string" + ): + request.applymarker(pytest.mark.xfail(reason="TODO: infer_string")) if isinstance(index, CategoricalIndex): pytest.skip(f"Not relevant for {type(index).__name__}") if len(index) < 2: diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index a7c2ec5acb7c2..97d6415e0de05 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -65,6 +65,7 @@ def test_factorize_complex(self): expected_uniques = np.array([(1 + 0j), (2 + 0j), (2 + 1j)], dtype=object) tm.assert_numpy_array_equal(uniques, expected_uniques) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("sort", [True, False]) def test_factorize(self, index_or_series_obj, sort): obj = index_or_series_obj From 7939b9a8a5a0f157f6db9779738318c220fff11a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 4 Nov 2024 07:31:13 +0100 Subject: [PATCH 2/3] suppress dtype inference warnings --- pandas/tests/base/test_misc.py | 1 + pandas/tests/indexes/test_common.py | 1 + pandas/tests/indexes/test_old_base.py | 1 + pandas/tests/series/methods/test_map.py | 1 + 4 files changed, 4 insertions(+) diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index b42e01c76335c..1bf0a8d75dd4f 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -165,6 +165,7 @@ def test_searchsorted(request, index_or_series_obj): assert 0 <= index <= len(obj) +@pytest.mark.filterwarnings(r"ignore:Dtype inference:FutureWarning") def test_access_by_position(index_flat): index = index_flat diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 05b2aa584674c..c08fcdaedbefe 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -147,6 +147,7 @@ def test_copy_and_deepcopy(self, index_flat): new_copy = index.copy(deep=True, name="banana") assert new_copy.name == "banana" + @pytest.mark.filterwarnings(r"ignore:Dtype inference:FutureWarning") def test_copy_name(self, index_flat): # GH#12309: Check that the "name" argument # passed at initialization is honored. diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py index 10fdf367812de..2f6bdb1fd8969 100644 --- a/pandas/tests/indexes/test_old_base.py +++ b/pandas/tests/indexes/test_old_base.py @@ -485,6 +485,7 @@ def test_delete_base(self, index): with pytest.raises(IndexError, match=msg): index.delete(length) + @pytest.mark.filterwarnings(r"ignore:Dtype inference:FutureWarning") @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") def test_equals(self, index): if isinstance(index, IntervalIndex): diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py index e5281a18236da..f33f5edb5ee66 100644 --- a/pandas/tests/series/methods/test_map.py +++ b/pandas/tests/series/methods/test_map.py @@ -221,6 +221,7 @@ def test_map_category_string(): tm.assert_series_equal(a.map(c), exp) +@pytest.mark.filterwarnings(r"ignore:Dtype inference:FutureWarning") def test_map_empty(request, index): if isinstance(index, MultiIndex): request.applymarker( From 029c1802df330f1a0e47a5c76af6c511364130a9 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 4 Nov 2024 08:45:51 +0100 Subject: [PATCH 3/3] fix index dtype in value_counts result --- pandas/core/algorithms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 56600bd9a5107..085a4ee41dcc9 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -931,7 +931,7 @@ def value_counts_internal( # For backwards compatibility, we let Index do its normal type # inference, _except_ for if if infers from object to bool. idx = Index(keys) - if idx.dtype == bool and keys.dtype == object: + if idx.dtype in [bool, "string"] and keys.dtype == object: idx = idx.astype(object) elif ( idx.dtype != keys.dtype # noqa: PLR1714 # # pylint: disable=R1714