pandas-dev · mroeschke · Dec 14, 2023 · Dec 14, 2023 · Dec 14, 2023
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -1071,7 +1071,7 @@ def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray:
             fill_value = Index(self._left, copy=False)._na_value
             empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1))
         else:
-            empty = self._from_sequence([fill_value] * empty_len)
+            empty = self._from_sequence([fill_value] * empty_len, dtype=self.dtype)
 
         if periods > 0:
             a = empty

diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py
@@ -150,7 +150,7 @@ def __len__(self) -> int:
         return len(self._pa_array)
 
     @classmethod
-    def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False):
+    def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False):
         from pandas.core.arrays.masked import BaseMaskedArray
 
         _chk_pyarrow_available()

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -52,7 +52,6 @@
     ensure_object,
     is_bool,
     is_bool_dtype,
-    is_extension_array_dtype,
     is_float_dtype,
     is_integer,
     is_integer_dtype,
@@ -1385,20 +1384,22 @@ def _maybe_coerce_merge_keys(self) -> None:
                 if lk.dtype.kind == rk.dtype.kind:
                     continue
 
-                if is_extension_array_dtype(lk.dtype) and not is_extension_array_dtype(
-                    rk.dtype
+                if isinstance(lk.dtype, ExtensionDtype) and not isinstance(
+                    rk.dtype, ExtensionDtype
                 ):
                     ct = find_common_type([lk.dtype, rk.dtype])
-                    if is_extension_array_dtype(ct):
-                        rk = ct.construct_array_type()._from_sequence(rk)  # type: ignore[union-attr]
+                    if isinstance(ct, ExtensionDtype):
+                        com_cls = ct.construct_array_type()
+                        rk = com_cls._from_sequence(rk, dtype=ct, copy=False)
                     else:
-                        rk = rk.astype(ct)  # type: ignore[arg-type]
-                elif is_extension_array_dtype(rk.dtype):
+                        rk = rk.astype(ct)
+                elif isinstance(rk.dtype, ExtensionDtype):
                     ct = find_common_type([lk.dtype, rk.dtype])
-                    if is_extension_array_dtype(ct):
-                        lk = ct.construct_array_type()._from_sequence(lk)  # type: ignore[union-attr]
+                    if isinstance(ct, ExtensionDtype):
+                        com_cls = ct.construct_array_type()
+                        lk = com_cls._from_sequence(lk, dtype=ct, copy=False)
                     else:
-                        lk = lk.astype(ct)  # type: ignore[arg-type]
+                        lk = lk.astype(ct)
 
                 # check whether ints and floats
                 if is_integer_dtype(rk.dtype) and is_float_dtype(lk.dtype):
@@ -2500,15 +2501,15 @@ def _convert_arrays_and_get_rizer_klass(
                 if not isinstance(lk, ExtensionArray):
                     lk = cls._from_sequence(lk, dtype=dtype, copy=False)
                 else:
-                    lk = lk.astype(dtype)
+                    lk = lk.astype(dtype, copy=False)
 
                 if not isinstance(rk, ExtensionArray):
                     rk = cls._from_sequence(rk, dtype=dtype, copy=False)
                 else:
-                    rk = rk.astype(dtype)
+                    rk = rk.astype(dtype, copy=False)
             else:
-                lk = lk.astype(dtype)
-                rk = rk.astype(dtype)
+                lk = lk.astype(dtype, copy=False)
+                rk = rk.astype(dtype, copy=False)
         if isinstance(lk, BaseMaskedArray):
             #  Invalid index type "type" for "Dict[Type[object], Type[Factorizer]]";
             #  expected type "Type[object]"

diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py
@@ -242,7 +242,8 @@ def test_coerce_to_numpy_array():
 
 def test_to_boolean_array_from_strings():
     result = BooleanArray._from_sequence_of_strings(
-        np.array(["True", "False", "1", "1.0", "0", "0.0", np.nan], dtype=object)
+        np.array(["True", "False", "1", "1.0", "0", "0.0", np.nan], dtype=object),
+        dtype="boolean",
     )
     expected = BooleanArray(
         np.array([True, False, True, True, False, False, False]),
@@ -254,7 +255,7 @@ def test_to_boolean_array_from_strings():
 
 def test_to_boolean_array_from_strings_invalid_string():
     with pytest.raises(ValueError, match="cannot be cast"):
-        BooleanArray._from_sequence_of_strings(["donkey"])
+        BooleanArray._from_sequence_of_strings(["donkey"], dtype="boolean")
 
 
 @pytest.mark.parametrize("box", [True, False], ids=["series", "array"])

diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
@@ -755,12 +755,12 @@ def test_categorical_extension_array_nullable(self, nulls_fixture):
 
     def test_from_sequence_copy(self):
         cat = Categorical(np.arange(5).repeat(2))
-        result = Categorical._from_sequence(cat, dtype=None, copy=False)
+        result = Categorical._from_sequence(cat, dtype=cat.dtype, copy=False)
 
         # more generally, we'd be OK with a view
         assert result._codes is cat._codes
 
-        result = Categorical._from_sequence(cat, dtype=None, copy=True)
+        result = Categorical._from_sequence(cat, dtype=cat.dtype, copy=True)
 
         assert not tm.shares_memory(result, cat)
 

diff --git a/pandas/tests/arrays/datetimes/test_cumulative.py b/pandas/tests/arrays/datetimes/test_cumulative.py
@@ -26,6 +26,7 @@ def test_accumulators_freq(self):
                 "2000-01-02",
                 "2000-01-03",
             ],
+            dtype="M8[ns]",
         )
         tm.assert_datetime_array_equal(result, expected)
 

diff --git a/pandas/tests/arrays/integer/test_construction.py b/pandas/tests/arrays/integer/test_construction.py
@@ -175,32 +175,34 @@ def test_to_integer_array_dtype_keyword(constructor):
 
 
 def test_to_integer_array_float():
-    result = IntegerArray._from_sequence([1.0, 2.0])
+    result = IntegerArray._from_sequence([1.0, 2.0], dtype="Int64")
     expected = pd.array([1, 2], dtype="Int64")
     tm.assert_extension_array_equal(result, expected)
 
     with pytest.raises(TypeError, match="cannot safely cast non-equivalent"):
-        IntegerArray._from_sequence([1.5, 2.0])
+        IntegerArray._from_sequence([1.5, 2.0], dtype="Int64")
 
     # for float dtypes, the itemsize is not preserved
-    result = IntegerArray._from_sequence(np.array([1.0, 2.0], dtype="float32"))
+    result = IntegerArray._from_sequence(
+        np.array([1.0, 2.0], dtype="float32"), dtype="Int64"
+    )
     assert result.dtype == Int64Dtype()
 
 
 def test_to_integer_array_str():
-    result = IntegerArray._from_sequence(["1", "2", None])
+    result = IntegerArray._from_sequence(["1", "2", None], dtype="Int64")
     expected = pd.array([1, 2, np.nan], dtype="Int64")
     tm.assert_extension_array_equal(result, expected)
 
     with pytest.raises(
         ValueError, match=r"invalid literal for int\(\) with base 10: .*"
     ):
-        IntegerArray._from_sequence(["1", "2", ""])
+        IntegerArray._from_sequence(["1", "2", ""], dtype="Int64")
 
     with pytest.raises(
         ValueError, match=r"invalid literal for int\(\) with base 10: .*"
     ):
-        IntegerArray._from_sequence(["1.5", "2.0"])
+        IntegerArray._from_sequence(["1.5", "2.0"], dtype="Int64")
 
 
 @pytest.mark.parametrize(

diff --git a/pandas/tests/arrays/test_array.py b/pandas/tests/arrays/test_array.py
@@ -60,7 +60,11 @@ def test_dt64_array(dtype_unit):
             None,
             NumpyExtensionArray(np.array([], dtype=object)),
         ),
-        (np.array([1, 2], dtype="int64"), None, IntegerArray._from_sequence([1, 2])),
+        (
+            np.array([1, 2], dtype="int64"),
+            None,
+            IntegerArray._from_sequence([1, 2], dtype="Int64"),
+        ),
         (
             np.array([1.0, 2.0], dtype="float64"),
             None,
@@ -284,7 +288,7 @@ def test_array_copy():
         # datetime
         (
             [pd.Timestamp("2000"), pd.Timestamp("2001")],
-            DatetimeArray._from_sequence(["2000", "2001"]),
+            DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"),
         ),
         (
             [datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)],
@@ -319,7 +323,7 @@ def test_array_copy():
         # timedelta
         (
             [pd.Timedelta("1h"), pd.Timedelta("2h")],
-            TimedeltaArray._from_sequence(["1h", "2h"]),
+            TimedeltaArray._from_sequence(["1h", "2h"], dtype="m8[ns]"),
         ),
         (
             np.array([1, 2], dtype="m8[ns]"),
@@ -330,35 +334,42 @@ def test_array_copy():
             TimedeltaArray(np.array([1, 2], dtype="m8[us]")),
         ),
         # integer
-        ([1, 2], IntegerArray._from_sequence([1, 2])),
-        ([1, None], IntegerArray._from_sequence([1, None])),
-        ([1, pd.NA], IntegerArray._from_sequence([1, pd.NA])),
-        ([1, np.nan], IntegerArray._from_sequence([1, np.nan])),
+        ([1, 2], IntegerArray._from_sequence([1, 2], dtype="Int64")),
+        ([1, None], IntegerArray._from_sequence([1, None], dtype="Int64")),
+        ([1, pd.NA], IntegerArray._from_sequence([1, pd.NA], dtype="Int64")),
+        ([1, np.nan], IntegerArray._from_sequence([1, np.nan], dtype="Int64")),
         # float
-        ([0.1, 0.2], FloatingArray._from_sequence([0.1, 0.2])),
-        ([0.1, None], FloatingArray._from_sequence([0.1, pd.NA])),
-        ([0.1, np.nan], FloatingArray._from_sequence([0.1, pd.NA])),
-        ([0.1, pd.NA], FloatingArray._from_sequence([0.1, pd.NA])),
+        ([0.1, 0.2], FloatingArray._from_sequence([0.1, 0.2], dtype="Float64")),
+        ([0.1, None], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")),
+        ([0.1, np.nan], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")),
+        ([0.1, pd.NA], FloatingArray._from_sequence([0.1, pd.NA], dtype="Float64")),
         # integer-like float
-        ([1.0, 2.0], FloatingArray._from_sequence([1.0, 2.0])),
-        ([1.0, None], FloatingArray._from_sequence([1.0, pd.NA])),
-        ([1.0, np.nan], FloatingArray._from_sequence([1.0, pd.NA])),
-        ([1.0, pd.NA], FloatingArray._from_sequence([1.0, pd.NA])),
+        ([1.0, 2.0], FloatingArray._from_sequence([1.0, 2.0], dtype="Float64")),
+        ([1.0, None], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")),
+        ([1.0, np.nan], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")),
+        ([1.0, pd.NA], FloatingArray._from_sequence([1.0, pd.NA], dtype="Float64")),
         # mixed-integer-float
-        ([1, 2.0], FloatingArray._from_sequence([1.0, 2.0])),
-        ([1, np.nan, 2.0], FloatingArray._from_sequence([1.0, None, 2.0])),
+        ([1, 2.0], FloatingArray._from_sequence([1.0, 2.0], dtype="Float64")),
+        (
+            [1, np.nan, 2.0],
+            FloatingArray._from_sequence([1.0, None, 2.0], dtype="Float64"),
+        ),
         # string
         (
             ["a", "b"],
-            pd.StringDtype().construct_array_type()._from_sequence(["a", "b"]),
+            pd.StringDtype()
+            .construct_array_type()
+            ._from_sequence(["a", "b"], dtype=pd.StringDtype()),
         ),
         (
             ["a", None],
-            pd.StringDtype().construct_array_type()._from_sequence(["a", None]),
+            pd.StringDtype()
+            .construct_array_type()
+            ._from_sequence(["a", None], dtype=pd.StringDtype()),
         ),
         # Boolean
-        ([True, False], BooleanArray._from_sequence([True, False])),
-        ([True, None], BooleanArray._from_sequence([True, None])),
+        ([True, False], BooleanArray._from_sequence([True, False], dtype="boolean")),
+        ([True, None], BooleanArray._from_sequence([True, None], dtype="boolean")),
     ],
 )
 def test_array_inference(data, expected):

diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py
@@ -18,7 +18,7 @@ def test_from_sequence_from_cls(self, data):
 
     def test_array_from_scalars(self, data):
         scalars = [data[0], data[1], data[2]]
-        result = data._from_sequence(scalars)
+        result = data._from_sequence(scalars, dtype=data.dtype)
         assert isinstance(result, type(data))
 
     def test_series_constructor(self, data):

diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
@@ -263,7 +263,7 @@ def test_duplicated(self, data, keep):
     @pytest.mark.parametrize("box", [pd.Series, lambda x: x])
     @pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique])
     def test_unique(self, data, box, method):
-        duplicated = box(data._from_sequence([data[0], data[0]]))
+        duplicated = box(data._from_sequence([data[0], data[0]], dtype=data.dtype))
 
         result = method(duplicated)
 

diff --git a/pandas/tests/extension/list/array.py b/pandas/tests/extension/list/array.py
@@ -54,7 +54,7 @@ def __init__(self, values, dtype=None, copy=False) -> None:
         self.data = values
 
     @classmethod
-    def _from_sequence(cls, scalars, dtype=None, copy=False):
+    def _from_sequence(cls, scalars, *, dtype=None, copy=False):
         data = np.empty(len(scalars), dtype=object)
         data[:] = scalars
         return cls(data)

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -294,11 +294,13 @@ def test_from_dtype(self, data, request):
     def test_from_sequence_pa_array(self, data):
         # https://github.com/pandas-dev/pandas/pull/47034#discussion_r955500784
         # data._pa_array = pa.ChunkedArray
-        result = type(data)._from_sequence(data._pa_array)
+        result = type(data)._from_sequence(data._pa_array, dtype=data.dtype)
         tm.assert_extension_array_equal(result, data)
         assert isinstance(result._pa_array, pa.ChunkedArray)
 
-        result = type(data)._from_sequence(data._pa_array.combine_chunks())
+        result = type(data)._from_sequence(
+            data._pa_array.combine_chunks(), dtype=data.dtype
+        )
         tm.assert_extension_array_equal(result, data)
         assert isinstance(result._pa_array, pa.ChunkedArray)
 

diff --git a/pandas/tests/indexes/timedeltas/test_constructors.py b/pandas/tests/indexes/timedeltas/test_constructors.py
@@ -31,7 +31,7 @@ def test_array_of_dt64_nat_raises(self):
             TimedeltaIndex(arr)
 
         with pytest.raises(TypeError, match=msg):
-            TimedeltaArray._from_sequence(arr)
+            TimedeltaArray._from_sequence(arr, dtype="m8[ns]")
 
         with pytest.raises(TypeError, match=msg):
             to_timedelta(arr)

diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py
@@ -98,7 +98,7 @@ def test_to_timedelta_oob_non_nano(self):
             TimedeltaIndex(arr)
 
         with pytest.raises(OutOfBoundsTimedelta, match=msg):
-            TimedeltaArray._from_sequence(arr)
+            TimedeltaArray._from_sequence(arr, dtype="m8[s]")
 
     @pytest.mark.parametrize(
         "arg", [np.arange(10).reshape(2, 5), pd.DataFrame(np.arange(10).reshape(2, 5))]