From 61983d8747fd589f07fb3a1945a9af94c3850905 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 23 Feb 2021 12:47:21 +0100
Subject: [PATCH 01/12] [ArrayManager] DataFrame constructors

---
 pandas/core/frame.py                          | 60 +++++++++++++------
 pandas/core/generic.py                        |  2 +
 pandas/core/internals/construction.py         | 39 +++++++-----
 .../frame/constructors/test_from_records.py   | 10 +++-
 4 files changed, 77 insertions(+), 34 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 131a96d10a6d0..2de39dc7c4330 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -563,29 +563,41 @@ def __init__(
         if isinstance(data, DataFrame):
             data = data._mgr
 
-        if isinstance(data, (BlockManager, ArrayManager)):
-            if index is None and columns is None and dtype is None and copy is False:
-                # GH#33357 fastpath
-                NDFrame.__init__(self, data)
-                return
+        if (
+            index is None
+            and columns is None
+            and dtype is None
+            and copy is False
+            and isinstance(data, (BlockManager, ArrayManager))
+        ):
+            # GH#33357 fastpath
+            NDFrame.__init__(self, data)
+            return
 
+        manager = get_option("mode.data_manager")
+
+        if isinstance(data, (BlockManager, ArrayManager)):
             mgr = self._init_mgr(
                 data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy
             )
 
         elif isinstance(data, dict):
-            mgr = init_dict(data, index, columns, dtype=dtype)
+            mgr = init_dict(data, index, columns, dtype=dtype, typ=manager)
         elif isinstance(data, ma.MaskedArray):
             import numpy.ma.mrecords as mrecords
 
             # masked recarray
             if isinstance(data, mrecords.MaskedRecords):
-                mgr = masked_rec_array_to_mgr(data, index, columns, dtype, copy)
+                mgr = masked_rec_array_to_mgr(
+                    data, index, columns, dtype, copy, typ=manager
+                )
 
             # a masked array
             else:
                 data = sanitize_masked_array(data)
-                mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
+                mgr = init_ndarray(
+                    data, index, columns, dtype=dtype, copy=copy, typ=manager
+                )
 
         elif isinstance(data, (np.ndarray, Series, Index)):
             if data.dtype.names:
@@ -593,11 +605,15 @@ def __init__(
                 data = {k: data[k] for k in data_columns}
                 if columns is None:
                     columns = data_columns
-                mgr = init_dict(data, index, columns, dtype=dtype)
+                mgr = init_dict(data, index, columns, dtype=dtype, typ=manager)
             elif getattr(data, "name", None) is not None:
-                mgr = init_dict({data.name: data}, index, columns, dtype=dtype)
+                mgr = init_dict(
+                    {data.name: data}, index, columns, dtype=dtype, typ=manager
+                )
             else:
-                mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
+                mgr = init_ndarray(
+                    data, index, columns, dtype=dtype, copy=copy, typ=manager
+                )
 
         # For data is list-like, or Iterable (will consume into list)
         elif is_list_like(data):
@@ -610,11 +626,15 @@ def __init__(
                     arrays, columns, index = nested_data_to_arrays(
                         data, columns, index, dtype
                     )
-                    mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype)
+                    mgr = arrays_to_mgr(
+                        arrays, columns, index, columns, dtype=dtype, typ=manager
+                    )
                 else:
-                    mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
+                    mgr = init_ndarray(
+                        data, index, columns, dtype=dtype, copy=copy, typ=manager
+                    )
             else:
-                mgr = init_dict({}, index, columns, dtype=dtype)
+                mgr = init_dict({}, index, columns, dtype=dtype, typ=manager)
         # For data is scalar
         else:
             if index is None or columns is None:
@@ -631,18 +651,19 @@ def __init__(
                     construct_1d_arraylike_from_scalar(data, len(index), dtype)
                     for _ in range(len(columns))
                 ]
-                mgr = arrays_to_mgr(values, columns, index, columns, dtype=None)
+                mgr = arrays_to_mgr(
+                    values, columns, index, columns, dtype=None, typ=manager
+                )
             else:
                 values = construct_2d_arraylike_from_scalar(
                     data, len(index), len(columns), dtype, copy
                 )
 
                 mgr = init_ndarray(
-                    values, index, columns, dtype=values.dtype, copy=False
+                    values, index, columns, dtype=values.dtype, copy=False, typ=manager
                 )
 
         # ensure correct Manager type according to settings
-        manager = get_option("mode.data_manager")
         mgr = mgr_to_mgr(mgr, typ=manager)
 
         NDFrame.__init__(self, mgr)
@@ -1970,7 +1991,8 @@ def from_records(
             arr_columns = arr_columns.drop(arr_exclude)
             columns = columns.drop(exclude)
 
-        mgr = arrays_to_mgr(arrays, arr_columns, result_index, columns)
+        manager = get_option("mode.data_manager")
+        mgr = arrays_to_mgr(arrays, arr_columns, result_index, columns, typ=manager)
 
         return cls(mgr)
 
@@ -2177,6 +2199,7 @@ def _from_arrays(
         if dtype is not None:
             dtype = pandas_dtype(dtype)
 
+        manager = get_option("mode.data_manager")
         mgr = arrays_to_mgr(
             arrays,
             columns,
@@ -2184,6 +2207,7 @@ def _from_arrays(
             columns,
             dtype=dtype,
             verify_integrity=verify_integrity,
+            typ=manager,
         )
         return cls(mgr)
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 1b7c02cd7a05b..68a6b270a66f8 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -138,6 +138,7 @@
     ArrayManager,
     BlockManager,
 )
+from pandas.core.internals.construction import mgr_to_mgr
 from pandas.core.missing import find_valid_index
 from pandas.core.ops import align_method_FRAME
 from pandas.core.reshape.concat import concat
@@ -5752,6 +5753,7 @@ def _to_dict_of_blocks(self, copy: bool_t = True):
         Internal ONLY - only works for BlockManager
         """
         mgr = self._mgr
+        mgr = mgr_to_mgr(mgr, "block")
         mgr = cast(BlockManager, mgr)
         return {
             k: self._constructor(v).__finalize__(self)
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index eb1a7a355f313..221c8c1ec4a53 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -71,7 +71,9 @@
     get_objs_combined_axis,
     union_indexes,
 )
+from pandas.core.internals.array_manager import ArrayManager
 from pandas.core.internals.managers import (
+    BlockManager,
     create_block_manager_from_arrays,
     create_block_manager_from_blocks,
 )
@@ -90,6 +92,7 @@ def arrays_to_mgr(
     columns,
     dtype: Optional[DtypeObj] = None,
     verify_integrity: bool = True,
+    typ: Optional[str] = None,
 ):
     """
     Segregate Series based on type and coerce into matrices.
@@ -116,11 +119,16 @@ def arrays_to_mgr(
     # from BlockManager perspective
     axes = [columns, index]
 
-    return create_block_manager_from_arrays(arrays, arr_names, axes)
+    if typ == "block":
+        return create_block_manager_from_arrays(arrays, arr_names, axes)
+    elif typ == "array":
+        return ArrayManager(arrays, [index, columns])
+    else:
+        raise ValueError(f"'typ' needs to be one of {{'block', 'array'}}, got '{typ}'")
 
 
 def masked_rec_array_to_mgr(
-    data: MaskedRecords, index, columns, dtype: Optional[DtypeObj], copy: bool
+    data: MaskedRecords, index, columns, dtype: Optional[DtypeObj], copy: bool, typ=None
 ):
     """
     Extract from a masked rec array and create the manager.
@@ -154,7 +162,7 @@ def masked_rec_array_to_mgr(
     if columns is None:
         columns = arr_columns
 
-    mgr = arrays_to_mgr(arrays, arr_columns, index, columns, dtype)
+    mgr = arrays_to_mgr(arrays, arr_columns, index, columns, dtype, typ=typ)
 
     if copy:
         mgr = mgr.copy()
@@ -166,11 +174,6 @@ def mgr_to_mgr(mgr, typ: str):
     Convert to specific type of Manager. Does not copy if the type is already
     correct. Does not guarantee a copy otherwise.
     """
-    from pandas.core.internals import (
-        ArrayManager,
-        BlockManager,
-    )
-
     new_mgr: Manager
 
     if typ == "block":
@@ -178,7 +181,12 @@ def mgr_to_mgr(mgr, typ: str):
             new_mgr = mgr
         else:
             new_mgr = arrays_to_mgr(
-                mgr.arrays, mgr.axes[0], mgr.axes[1], mgr.axes[0], dtype=None
+                mgr.arrays,
+                mgr.axes[0],
+                mgr.axes[1],
+                mgr.axes[0],
+                dtype=None,
+                typ="block",
             )
     elif typ == "array":
         if isinstance(mgr, ArrayManager):
@@ -187,7 +195,7 @@ def mgr_to_mgr(mgr, typ: str):
             arrays = [mgr.iget_values(i).copy() for i in range(len(mgr.axes[0]))]
             new_mgr = ArrayManager(arrays, [mgr.axes[1], mgr.axes[0]])
     else:
-        raise ValueError(f"'typ' needs to be one of {{'block', 'array'}}, got '{type}'")
+        raise ValueError(f"'typ' needs to be one of {{'block', 'array'}}, got '{typ}'")
     return new_mgr
 
 
@@ -195,7 +203,9 @@ def mgr_to_mgr(mgr, typ: str):
 # DataFrame Constructor Interface
 
 
-def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
+def init_ndarray(
+    values, index, columns, dtype: Optional[DtypeObj], copy: bool, typ: str
+):
     # input must be a ndarray, list, Series, index
 
     if isinstance(values, ABCSeries):
@@ -224,7 +234,7 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
         if columns is None:
             columns = Index(range(len(values)))
 
-        return arrays_to_mgr(values, columns, index, columns, dtype=dtype)
+        return arrays_to_mgr(values, columns, index, columns, dtype=dtype, typ=typ)
 
     # by definition an array here
     # the dtypes will be coerced to a single dtype
@@ -277,7 +287,7 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
     return create_block_manager_from_blocks(block_values, [columns, index])
 
 
-def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):
+def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None, typ=None):
     """
     Segregate Series based on type and coerce into matrices.
     Needs to handle a lot of exceptional cases.
@@ -321,7 +331,7 @@ def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):
         arrays = [
             arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
         ]
-    return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
+    return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype, typ=typ)
 
 
 def nested_data_to_arrays(
@@ -415,6 +425,7 @@ def _homogenize(data, index, dtype: Optional[DtypeObj]):
                 # Forces alignment. No need to copy data since we
                 # are putting it into an ndarray later
                 val = val.reindex(index, copy=False)
+            val = extract_array(val, extract_numpy=True)
         else:
             if isinstance(val, dict):
                 if oindex is None:
diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py
index 0d36f3bd80e26..87ca5d1a8a170 100644
--- a/pandas/tests/frame/constructors/test_from_records.py
+++ b/pandas/tests/frame/constructors/test_from_records.py
@@ -184,7 +184,10 @@ def test_from_records_bad_index_column(self):
         tm.assert_index_equal(df1.index, Index(df.C))
 
         # should fail
-        msg = r"Shape of passed values is \(10, 3\), indices imply \(1, 3\)"
+        msg = (
+            r"Shape of passed values is \(10, 3\), indices imply \(1, 3\)|"
+            "Passed arrays should have the same length as the rows Index: 10 vs 1 rows"
+        )
         with pytest.raises(ValueError, match=msg):
             DataFrame.from_records(df, index=[2])
         with pytest.raises(KeyError, match=r"^2$"):
@@ -259,7 +262,10 @@ def test_from_records_to_records(self):
         tm.assert_frame_equal(DataFrame.from_records(arr2), DataFrame(arr2))
 
         # wrong length
-        msg = r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)"
+        msg = (
+            r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)|"
+            "Passed arrays should have the same length as the rows Index: 2 vs 1 rows"
+        )
         with pytest.raises(ValueError, match=msg):
             DataFrame.from_records(arr, index=index[:-1])
 

From 1d0315f6f7c8e99e07d5fda2ae06191e1ab6786d Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 23 Feb 2021 14:23:00 +0100
Subject: [PATCH 02/12] clean-up signatures

---
 pandas/core/internals/construction.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 221c8c1ec4a53..a940e1935a13a 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -128,7 +128,7 @@ def arrays_to_mgr(
 
 
 def masked_rec_array_to_mgr(
-    data: MaskedRecords, index, columns, dtype: Optional[DtypeObj], copy: bool, typ=None
+    data: MaskedRecords, index, columns, dtype: Optional[DtypeObj], copy: bool, typ: str
 ):
     """
     Extract from a masked rec array and create the manager.
@@ -181,12 +181,7 @@ def mgr_to_mgr(mgr, typ: str):
             new_mgr = mgr
         else:
             new_mgr = arrays_to_mgr(
-                mgr.arrays,
-                mgr.axes[0],
-                mgr.axes[1],
-                mgr.axes[0],
-                dtype=None,
-                typ="block",
+                mgr.arrays, mgr.axes[0], mgr.axes[1], mgr.axes[0], typ="block"
             )
     elif typ == "array":
         if isinstance(mgr, ArrayManager):
@@ -287,7 +282,7 @@ def init_ndarray(
     return create_block_manager_from_blocks(block_values, [columns, index])
 
 
-def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None, typ=None):
+def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj], typ: str):
     """
     Segregate Series based on type and coerce into matrices.
     Needs to handle a lot of exceptional cases.

From ffc831446878b098672f4ed1c36d26117823a3e7 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 23 Feb 2021 14:42:17 +0100
Subject: [PATCH 03/12] 'fix' for PandasArrays

---
 pandas/core/internals/construction.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index a940e1935a13a..892ddfd335b5b 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -420,7 +420,10 @@ def _homogenize(data, index, dtype: Optional[DtypeObj]):
                 # Forces alignment. No need to copy data since we
                 # are putting it into an ndarray later
                 val = val.reindex(index, copy=False)
-            val = extract_array(val, extract_numpy=True)
+            # extract_array should be preferred? But that gives failures for
+            # `extension/test_numpy.py`
+            # val = extract_array(val, extract_numpy=True)
+            val = val._values
         else:
             if isinstance(val, dict):
                 if oindex is None:

From 46e73c80b497fc94df9c952a3f27facf7ac26286 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 23 Feb 2021 15:10:28 +0100
Subject: [PATCH 04/12] tests

---
 .github/workflows/ci.yml                      |  3 +++
 pandas/core/internals/array_manager.py        |  8 +++++-
 .../frame/constructors/test_from_records.py   |  6 +++++
 pandas/tests/frame/test_constructors.py       | 25 ++++++++++++++++---
 4 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 461363d295f6a..9240cf4d4519a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -153,6 +153,9 @@ jobs:
       run: |
         source activate pandas-dev
         pytest pandas/tests/frame/methods --array-manager
+        pytest pandas/tests/frame/test_constructors.py --array-manager
+        pytest pandas/tests/frame/constructors/ --array-manager
+
         pytest pandas/tests/arithmetic/ --array-manager
         pytest pandas/tests/reshape/merge --array-manager
 
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index d38d278e89a67..0d73acd080f7a 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -741,7 +741,13 @@ def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False
 
         value = extract_array(value, extract_numpy=True)
         if value.ndim == 2:
-            value = value[0, :]
+            if value.shape[0] == 1:
+                value = value[0, :]
+            else:
+                raise ValueError(
+                    f"expected 1D array, got array with shape {value.shape}"
+                )
+
         # TODO self.arrays can be empty
         # assert len(value) == len(self.arrays[0])
 
diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py
index 87ca5d1a8a170..98713cc690a6c 100644
--- a/pandas/tests/frame/constructors/test_from_records.py
+++ b/pandas/tests/frame/constructors/test_from_records.py
@@ -6,6 +6,7 @@
 import pytz
 
 from pandas.compat import is_platform_little_endian
+import pandas.util._test_decorators as td
 
 from pandas import (
     CategoricalIndex,
@@ -118,6 +119,8 @@ def test_from_records_sequencelike(self):
         tm.assert_series_equal(result["C"], df["C"])
         tm.assert_series_equal(result["E1"], df["E1"].astype("float64"))
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) empty from_records
+    def test_from_records_sequencelike_empty(self):
         # empty case
         result = DataFrame.from_records([], columns=["foo", "bar", "baz"])
         assert len(result) == 0
@@ -211,6 +214,7 @@ def __iter__(self):
         expected = DataFrame.from_records(tups)
         tm.assert_frame_equal(result, expected)
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) empty from_records
     def test_from_records_len0_with_columns(self):
         # GH#2633
         result = DataFrame.from_records([], index="foo", columns=["foo", "bar"])
@@ -392,6 +396,7 @@ def create_dict(order_id):
         result = DataFrame.from_records(documents, index=["order_id", "quantity"])
         assert result.index.names == ("order_id", "quantity")
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) empty from_records
     def test_from_records_misc_brokenness(self):
         # GH#2179
 
@@ -430,6 +435,7 @@ def test_from_records_misc_brokenness(self):
         )
         tm.assert_series_equal(result, expected)
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) empty from_records
     def test_from_records_empty(self):
         # GH#3562
         result = DataFrame.from_records([], columns=["a", "b", "c"])
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 14adc8a992609..ba2c6daa11137 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -18,6 +18,7 @@
 import pytz
 
 from pandas.compat import np_version_under1p19
+import pandas.util._test_decorators as td
 
 from pandas.core.dtypes.common import is_integer_dtype
 from pandas.core.dtypes.dtypes import (
@@ -159,7 +160,10 @@ def test_constructor_cast_failure(self):
         df["foo"] = np.ones((4, 2)).tolist()
 
         # this is not ok
-        msg = "Wrong number of items passed 2, placement implies 1"
+        msg = (
+            "Wrong number of items passed 2, placement implies 1"
+            "|expected 1D array, got array"
+        )
         with pytest.raises(ValueError, match=msg):
             df["test"] = np.ones((4, 2))
 
@@ -174,12 +178,15 @@ def test_constructor_dtype_copy(self):
         new_df["col1"] = 200.0
         assert orig_df["col1"][0] == 1.0
 
-    def test_constructor_dtype_nocast_view(self):
+    def test_constructor_dtype_nocast_view_dataframe(self):
         df = DataFrame([[1, 2]])
         should_be_view = DataFrame(df, dtype=df[0].dtype)
         should_be_view[0][0] = 99
         assert df.values[0, 0] == 99
 
+    @td.skip_array_manager_invalid_test  # TODO(ArrayManager) keep view on 2D array?
+    def test_constructor_dtype_nocast_view_2d_array(self):
+        df = DataFrame([[1, 2]])
         should_be_view = DataFrame(df.values, dtype=df[0].dtype)
         should_be_view[0][0] = 97
         assert df.values[0, 0] == 97
@@ -1931,6 +1938,8 @@ def test_constructor_frame_copy(self, float_frame):
         assert (cop["A"] == 5).all()
         assert not (float_frame["A"] == 5).all()
 
+    # TODO(ArrayManager) keep view on 2D array?
+    @td.skip_array_manager_not_yet_implemented
     def test_constructor_ndarray_copy(self, float_frame):
         df = DataFrame(float_frame.values)
 
@@ -1941,6 +1950,8 @@ def test_constructor_ndarray_copy(self, float_frame):
         float_frame.values[6] = 6
         assert not (df.values[6] == 6).all()
 
+    # TODO(ArrayManager) keep view on Series?
+    @td.skip_array_manager_not_yet_implemented
     def test_constructor_series_copy(self, float_frame):
         series = float_frame._series
 
@@ -2054,7 +2065,10 @@ def test_from_nested_listlike_mixed_types(self):
 
     def test_construct_from_listlikes_mismatched_lengths(self):
         # invalid (shape)
-        msg = r"Shape of passed values is \(6, 2\), indices imply \(3, 2\)"
+        msg = (
+            r"Shape of passed values is \(6, 2\), indices imply \(3, 2\)|"
+            "Passed arrays should have the same length as the rows Index"
+        )
         with pytest.raises(ValueError, match=msg):
             DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))])
 
@@ -2106,6 +2120,8 @@ def test_check_dtype_empty_numeric_column(self, dtype):
 
         assert data.b.dtype == dtype
 
+    # TODO(ArrayManager) astype to bytes dtypes does not yet give object dtype
+    @td.skip_array_manager_not_yet_implemented
     @pytest.mark.parametrize(
         "dtype", tm.STRING_DTYPES + tm.BYTES_DTYPES + tm.OBJECT_DTYPES
     )
@@ -2209,7 +2225,8 @@ class DatetimeSubclass(datetime):
     def test_with_mismatched_index_length_raises(self):
         # GH#33437
         dti = date_range("2016-01-01", periods=3, tz="US/Pacific")
-        with pytest.raises(ValueError, match="Shape of passed values"):
+        msg = "Shape of passed values|Passed arrays should have the same length"
+        with pytest.raises(ValueError, match=msg):
             DataFrame(dti, index=range(4))
 
     def test_frame_ctor_datetime64_column(self):

From 3e108df446af36b336ee5e7240a43c931bb991fa Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 23 Feb 2021 16:02:58 +0100
Subject: [PATCH 05/12] ensure datetime-like array

---
 pandas/core/internals/construction.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 892ddfd335b5b..7acbc6ad8068b 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -61,6 +61,7 @@
 )
 from pandas.core.arrays import Categorical
 from pandas.core.construction import (
+    ensure_wrapped_if_datetimelike,
     extract_array,
     sanitize_array,
 )
@@ -110,7 +111,8 @@ def arrays_to_mgr(
 
         # don't force copy because getting jammed in an ndarray anyway
         arrays = _homogenize(arrays, index, dtype)
-
+        if typ == "array":
+            arrays = [ensure_wrapped_if_datetimelike(arr) for arr in arrays]
         columns = ensure_index(columns)
     else:
         columns = ensure_index(columns)

From 8726d421ceb7c858ec309534140976d4035ce560 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 24 Feb 2021 14:29:21 +0100
Subject: [PATCH 06/12] small clean-up - additional comments

---
 pandas/core/frame.py                    | 2 ++
 pandas/core/generic.py                  | 1 +
 pandas/core/internals/array_manager.py  | 2 +-
 pandas/core/internals/construction.py   | 5 +++--
 pandas/tests/frame/test_constructors.py | 2 +-
 5 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index fffb99cf1d5d6..8acfcc9ad70b3 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -564,6 +564,8 @@ def __init__(
         if isinstance(data, DataFrame):
             data = data._mgr
 
+        # first check if a Manager is passed without any other arguments
+        # -> use fastpath (without checking Manager type)
         if (
             index is None
             and columns is None
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 68a6b270a66f8..5b83ada85d3f6 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -5753,6 +5753,7 @@ def _to_dict_of_blocks(self, copy: bool_t = True):
         Internal ONLY - only works for BlockManager
         """
         mgr = self._mgr
+        # convert to BlockManager if needed -> this way support ArrayManager as well
         mgr = mgr_to_mgr(mgr, "block")
         mgr = cast(BlockManager, mgr)
         return {
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 1c2267a0ca691..8bb57ff0ae9a0 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -745,7 +745,7 @@ def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False
                 value = value[0, :]
             else:
                 raise ValueError(
-                    f"expected 1D array, got array with shape {value.shape}"
+                    f"Expected a 1D array, got an array with shape {value.shape}"
                 )
 
         # TODO self.arrays can be empty
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 7acbc6ad8068b..cd9c370165f8a 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -422,8 +422,9 @@ def _homogenize(data, index, dtype: Optional[DtypeObj]):
                 # Forces alignment. No need to copy data since we
                 # are putting it into an ndarray later
                 val = val.reindex(index, copy=False)
-            # extract_array should be preferred? But that gives failures for
-            # `extension/test_numpy.py`
+            # TODO extract_array should be preferred, but that gives failures for
+            # `extension/test_numpy.py` (extract_array will convert numpy arrays
+            # to PandasArray)
             # val = extract_array(val, extract_numpy=True)
             val = val._values
         else:
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index ba2c6daa11137..95c808326235b 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -162,7 +162,7 @@ def test_constructor_cast_failure(self):
         # this is not ok
         msg = (
             "Wrong number of items passed 2, placement implies 1"
-            "|expected 1D array, got array"
+            "|Expected a 1D array, got an array with shape \\(4, 2\\)"
         )
         with pytest.raises(ValueError, match=msg):
             df["test"] = np.ones((4, 2))

From 6e171838fd73366b9f472ad257801d45e155a7ab Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 24 Feb 2021 16:45:01 +0100
Subject: [PATCH 07/12] use string join for msg

---
 .../frame/constructors/test_from_records.py      | 16 ++++++++++------
 pandas/tests/frame/test_constructors.py          | 16 ++++++++++------
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py
index 98713cc690a6c..3ead6b722713c 100644
--- a/pandas/tests/frame/constructors/test_from_records.py
+++ b/pandas/tests/frame/constructors/test_from_records.py
@@ -187,9 +187,11 @@ def test_from_records_bad_index_column(self):
         tm.assert_index_equal(df1.index, Index(df.C))
 
         # should fail
-        msg = (
-            r"Shape of passed values is \(10, 3\), indices imply \(1, 3\)|"
-            "Passed arrays should have the same length as the rows Index: 10 vs 1 rows"
+        msg = "|".join(
+            [
+                r"Shape of passed values is \(10, 3\), indices imply \(1, 3\)",
+                "Passed arrays should have the same length as the rows Index: 10 vs 1",
+            ]
         )
         with pytest.raises(ValueError, match=msg):
             DataFrame.from_records(df, index=[2])
@@ -266,9 +268,11 @@ def test_from_records_to_records(self):
         tm.assert_frame_equal(DataFrame.from_records(arr2), DataFrame(arr2))
 
         # wrong length
-        msg = (
-            r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)|"
-            "Passed arrays should have the same length as the rows Index: 2 vs 1 rows"
+        msg = "|".join(
+            [
+                r"Shape of passed values is \(2, 3\), indices imply \(1, 3\)",
+                "Passed arrays should have the same length as the rows Index: 2 vs 1",
+            ]
         )
         with pytest.raises(ValueError, match=msg):
             DataFrame.from_records(arr, index=index[:-1])
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 95c808326235b..19dd85d3eeeb6 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -160,9 +160,11 @@ def test_constructor_cast_failure(self):
         df["foo"] = np.ones((4, 2)).tolist()
 
         # this is not ok
-        msg = (
-            "Wrong number of items passed 2, placement implies 1"
-            "|Expected a 1D array, got an array with shape \\(4, 2\\)"
+        msg = "|".join(
+            [
+                "Wrong number of items passed 2, placement implies 1",
+                "Expected a 1D array, got an array with shape \\(4, 2\\)",
+            ]
         )
         with pytest.raises(ValueError, match=msg):
             df["test"] = np.ones((4, 2))
@@ -2065,9 +2067,11 @@ def test_from_nested_listlike_mixed_types(self):
 
     def test_construct_from_listlikes_mismatched_lengths(self):
         # invalid (shape)
-        msg = (
-            r"Shape of passed values is \(6, 2\), indices imply \(3, 2\)|"
-            "Passed arrays should have the same length as the rows Index"
+        msg = "|".join(
+            [
+                r"Shape of passed values is \(6, 2\), indices imply \(3, 2\)",
+                "Passed arrays should have the same length as the rows Index",
+            ]
         )
         with pytest.raises(ValueError, match=msg):
             DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))])

From 8096665343e3aea8476ec5148ba45228936c27be Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 24 Feb 2021 16:56:36 +0100
Subject: [PATCH 08/12] add github issue link to comment

---
 pandas/core/internals/construction.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index cd9c370165f8a..974e3a961db8e 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -424,7 +424,7 @@ def _homogenize(data, index, dtype: Optional[DtypeObj]):
                 val = val.reindex(index, copy=False)
             # TODO extract_array should be preferred, but that gives failures for
             # `extension/test_numpy.py` (extract_array will convert numpy arrays
-            # to PandasArray)
+            # to PandasArray), see https://github.com/pandas-dev/pandas/issues/40021
             # val = extract_array(val, extract_numpy=True)
             val = val._values
         else:

From 54d36ab4976831de3222c1ba7cc5bccc9bc24a72 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 1 Mar 2021 10:19:52 +0100
Subject: [PATCH 09/12] move wrapping inside ArrayManager constructor

---
 pandas/core/internals/array_manager.py | 1 +
 pandas/core/internals/construction.py  | 4 +---
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index b9638f199c927..836c85c106d42 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -113,6 +113,7 @@ def __init__(
 
         if verify_integrity:
             self._axes = [ensure_index(ax) for ax in axes]
+            self.arrays = [ensure_wrapped_if_datetimelike(arr) for arr in arrays]
             self._verify_integrity()
 
     def make_empty(self: T, axes=None) -> T:
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 618ed4830260f..296d0dbd50cf6 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -59,7 +59,6 @@
 )
 from pandas.core.arrays import Categorical
 from pandas.core.construction import (
-    ensure_wrapped_if_datetimelike,
     extract_array,
     sanitize_array,
 )
@@ -109,8 +108,7 @@ def arrays_to_mgr(
 
         # don't force copy because getting jammed in an ndarray anyway
         arrays = _homogenize(arrays, index, dtype)
-        if typ == "array":
-            arrays = [ensure_wrapped_if_datetimelike(arr) for arr in arrays]
+
         columns = ensure_index(columns)
     else:
         columns = ensure_index(columns)

From c56ffa892751349b6e053c60c1c76406e46263bf Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 1 Mar 2021 10:27:36 +0100
Subject: [PATCH 10/12] remove skip

---
 pandas/tests/groupby/test_groupby.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index b270539921c9c..8cbb9d2443cb2 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1196,7 +1196,6 @@ def convert_force_pure(x):
     assert isinstance(result[0], Decimal)
 
 
-@td.skip_array_manager_not_yet_implemented
 def test_groupby_dtype_inference_empty():
     # GH 6733
     df = DataFrame({"x": [], "range": np.arange(0, dtype="int64")})

From 164387c8894a0b9c3aa7d98668e2faa77cdcee2c Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 1 Mar 2021 13:24:52 +0100
Subject: [PATCH 11/12] trigger ci


From 143b57238b72b75758bcbdfa8236db153bacd653 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 1 Mar 2021 14:03:36 +0100
Subject: [PATCH 12/12] add skip for rename copy

---
 pandas/tests/frame/methods/test_rename.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py
index 677d862dfe077..462d588aff58f 100644
--- a/pandas/tests/frame/methods/test_rename.py
+++ b/pandas/tests/frame/methods/test_rename.py
@@ -170,6 +170,7 @@ def test_rename_multiindex(self):
         renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0)
         tm.assert_index_equal(renamed.index, new_index)
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) setitem copy/view
     def test_rename_nocopy(self, float_frame):
         renamed = float_frame.rename(columns={"C": "foo"}, copy=False)
         renamed["foo"] = 1.0