TST (string): copy/view tests (#59702)

jbrockmendel · jorisvandenbossche · commit 77a2326d924f · 2024-10-10T15:04:52.000+02:00
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -453,7 +453,7 @@ def __eq__(self, other: object) -> bool:
                 # Because left and right have the same length and are unique,
                 #  `indexer` not having any -1s implies that there is a
                 #  bijection between `left` and `right`.
-                return (indexer != -1).all()
+                return bool((indexer != -1).all())
 
             # With object-dtype we need a comparison that identifies
             #  e.g. int(2) as distinct from float(2)
diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py
@@ -1,8 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -285,10 +283,9 @@ def test_dataframe_from_dict_of_series_with_reindex(dtype):
     assert np.shares_memory(arr_before, arr_after)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("cons", [Series, Index])
 @pytest.mark.parametrize(
-    "data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], None)]
+    "data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], object)]
 )
 def test_dataframe_from_series_or_index(
     using_copy_on_write, warn_copy_on_write, data, dtype, cons
diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py
@@ -16,10 +16,9 @@
 from pandas.tests.copy_view.util import get_array
 
 
-@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_concat_frames(using_copy_on_write):
-    df = DataFrame({"b": ["a"] * 3})
-    df2 = DataFrame({"a": ["a"] * 3})
+    df = DataFrame({"b": ["a"] * 3}, dtype=object)
+    df2 = DataFrame({"a": ["a"] * 3}, dtype=object)
     df_orig = df.copy()
     result = concat([df, df2], axis=1)
 
@@ -41,10 +40,9 @@ def test_concat_frames(using_copy_on_write):
     tm.assert_frame_equal(df, df_orig)
 
 
-@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_concat_frames_updating_input(using_copy_on_write):
-    df = DataFrame({"b": ["a"] * 3})
-    df2 = DataFrame({"a": ["a"] * 3})
+    df = DataFrame({"b": ["a"] * 3}, dtype=object)
+    df2 = DataFrame({"a": ["a"] * 3}, dtype=object)
     result = concat([df, df2], axis=1)
 
     if using_copy_on_write:
@@ -203,7 +201,7 @@ def test_concat_copy_keyword(using_copy_on_write, copy):
         assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
 
 
-@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
+# @pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 @pytest.mark.parametrize(
     "func",
     [
@@ -212,8 +210,8 @@ def test_concat_copy_keyword(using_copy_on_write, copy):
     ],
 )
 def test_merge_on_key(using_copy_on_write, func):
-    df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]})
-    df2 = DataFrame({"key": ["a", "b", "c"], "b": [4, 5, 6]})
+    df1 = DataFrame({"key": Series(["a", "b", "c"], dtype=object), "a": [1, 2, 3]})
+    df2 = DataFrame({"key": Series(["a", "b", "c"], dtype=object), "b": [4, 5, 6]})
     df1_orig = df1.copy()
     df2_orig = df2.copy()
 
@@ -267,7 +265,6 @@ def test_merge_on_index(using_copy_on_write):
     tm.assert_frame_equal(df2, df2_orig)
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize(
     "func, how",
     [
@@ -276,8 +273,8 @@ def test_merge_on_index(using_copy_on_write):
     ],
 )
 def test_merge_on_key_enlarging_one(using_copy_on_write, func, how):
-    df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]})
-    df2 = DataFrame({"key": ["a", "b"], "b": [4, 5]})
+    df1 = DataFrame({"key": Series(["a", "b", "c"], dtype=object), "a": [1, 2, 3]})
+    df2 = DataFrame({"key": Series(["a", "b"], dtype=object), "b": [4, 5]})
     df1_orig = df1.copy()
     df2_orig = df2.copy()
 
@@ -321,9 +318,13 @@ def test_merge_copy_keyword(using_copy_on_write, copy):
         assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
 
 
-@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
+@pytest.mark.xfail(
+    using_string_dtype() and HAS_PYARROW,
+    reason="TODO(infer_string); result.index infers str dtype while both "
+    "df1 and df2 index are object.",
+)
 def test_join_on_key(using_copy_on_write):
-    df_index = Index(["a", "b", "c"], name="key")
+    df_index = Index(["a", "b", "c"], name="key", dtype=object)
 
     df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True))
     df2 = DataFrame({"b": [4, 5, 6]}, index=df_index.copy(deep=True))
@@ -355,9 +356,8 @@ def test_join_on_key(using_copy_on_write):
     tm.assert_frame_equal(df2, df2_orig)
 
 
-@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_join_multiple_dataframes_on_key(using_copy_on_write):
-    df_index = Index(["a", "b", "c"], name="key")
+    df_index = Index(["a", "b", "c"], name="key", dtype=object)
 
     df1 = DataFrame({"a": [1, 2, 3]}, index=df_index.copy(deep=True))
     dfs_list = [
diff --git a/pandas/tests/copy_view/test_internals.py b/pandas/tests/copy_view/test_internals.py
@@ -1,12 +1,13 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_string_dtype
-
 import pandas.util._test_decorators as td
 
 import pandas as pd
-from pandas import DataFrame
+from pandas import (
+    DataFrame,
+    Series,
+)
 import pandas._testing as tm
 from pandas.tests.copy_view.util import get_array
 
@@ -78,7 +79,6 @@ def test_switch_options():
 
 
 @td.skip_array_manager_invalid_test
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("dtype", [np.intp, np.int8])
 @pytest.mark.parametrize(
     "locs, arr",
@@ -105,7 +105,7 @@ def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr, dtype):
             "c": [7, 8, 9],
             "d": [10, 11, 12],
             "e": [13, 14, 15],
-            "f": ["a", "b", "c"],
+            "f": Series(["a", "b", "c"], dtype=object),
         },
     )
     arr = arr.astype(dtype)
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
@@ -5,8 +5,6 @@
 import pytest
 import pytz
 
-from pandas._config import using_string_dtype
-
 from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
 
 from pandas.core.dtypes.base import _registry as registry
@@ -961,7 +959,6 @@ def test_same_categories_different_order(self):
         c2 = CategoricalDtype(["b", "a"], ordered=True)
         assert c1 is not c2
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.parametrize("ordered1", [True, False, None])
     @pytest.mark.parametrize("ordered2", [True, False, None])
     def test_categorical_equality(self, ordered1, ordered2):