From cbc97f0e7ac49f5a78eb982f933281dc879a7e2a Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 5 Oct 2020 10:45:28 -0700
Subject: [PATCH 1/2] ENH: allow non-consolidation in constructors

---
 pandas/core/frame.py                  | 82 +++++++++++++++++++++++----
 pandas/core/internals/construction.py | 42 +++++++++++---
 pandas/core/internals/managers.py     | 17 ++++--
 pandas/core/ops/__init__.py           |  2 +-
 4 files changed, 118 insertions(+), 25 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 1f9987d9d3f5b..4af6097a51f73 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -361,6 +361,8 @@ class DataFrame(NDFrame):
         Data type to force. Only a single dtype is allowed. If None, infer.
     copy : bool, default False
         Copy data from inputs. Only affects DataFrame / 2d ndarray input.
+    consolidate : bool or None, default None
+        Whether to consolidate the arrays in the new DataFrame.
 
     See Also
     --------
@@ -437,12 +439,16 @@ def __init__(
         columns: Optional[Axes] = None,
         dtype: Optional[Dtype] = None,
         copy: bool = False,
+        consolidate=None,
     ):
         if data is None:
             data = {}
         if dtype is not None:
             dtype = self._validate_dtype(dtype)
 
+        if consolidate is None:
+            consolidate = not copy
+
         if isinstance(data, DataFrame):
             data = data._mgr
 
@@ -457,7 +463,7 @@ def __init__(
             )
 
         elif isinstance(data, dict):
-            mgr = init_dict(data, index, columns, dtype=dtype)
+            mgr = init_dict(data, index, columns, dtype=dtype, consolidate=consolidate)
         elif isinstance(data, ma.MaskedArray):
             import numpy.ma.mrecords as mrecords
 
@@ -474,7 +480,14 @@ def __init__(
                     data[mask] = fill_value
                 else:
                     data = data.copy()
-                mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
+                mgr = init_ndarray(
+                    data,
+                    index,
+                    columns,
+                    dtype=dtype,
+                    copy=copy,
+                    consolidate=consolidate,
+                )
 
         elif isinstance(data, (np.ndarray, Series, Index)):
             if data.dtype.names:
@@ -482,11 +495,26 @@ def __init__(
                 data = {k: data[k] for k in data_columns}
                 if columns is None:
                     columns = data_columns
-                mgr = init_dict(data, index, columns, dtype=dtype)
+                mgr = init_dict(
+                    data, index, columns, dtype=dtype, consolidate=consolidate
+                )
             elif getattr(data, "name", None) is not None:
-                mgr = init_dict({data.name: data}, index, columns, dtype=dtype)
+                mgr = init_dict(
+                    {data.name: data},
+                    index,
+                    columns,
+                    dtype=dtype,
+                    consolidate=consolidate,
+                )
             else:
-                mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
+                mgr = init_ndarray(
+                    data,
+                    index,
+                    columns,
+                    dtype=dtype,
+                    copy=copy,
+                    consolidate=consolidate,
+                )
 
         # For data is list-like, or Iterable (will consume into list)
         elif isinstance(data, abc.Iterable) and not isinstance(data, (str, bytes)):
@@ -510,11 +538,27 @@ def __init__(
                         else:
                             index = ibase.default_index(len(data))
 
-                    mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype)
+                    mgr = arrays_to_mgr(
+                        arrays,
+                        columns,
+                        index,
+                        columns,
+                        dtype=dtype,
+                        consolidate=consolidate,
+                    )
                 else:
-                    mgr = init_ndarray(data, index, columns, dtype=dtype, copy=copy)
+                    mgr = init_ndarray(
+                        data,
+                        index,
+                        columns,
+                        dtype=dtype,
+                        copy=copy,
+                        consolidate=consolidate,
+                    )
             else:
-                mgr = init_dict({}, index, columns, dtype=dtype)
+                mgr = init_dict(
+                    {}, index, columns, dtype=dtype, consolidate=consolidate
+                )
         # For data is scalar
         else:
             if index is None or columns is None:
@@ -530,7 +574,9 @@ def __init__(
                     construct_1d_arraylike_from_scalar(data, len(index), dtype)
                     for _ in range(len(columns))
                 ]
-                mgr = arrays_to_mgr(values, columns, index, columns, dtype=None)
+                mgr = arrays_to_mgr(
+                    values, columns, index, columns, dtype=None, consolidate=consolidate
+                )
             else:
                 # Attempt to coerce to a numpy array
                 try:
@@ -550,7 +596,12 @@ def __init__(
                 )
 
                 mgr = init_ndarray(
-                    values, index, columns, dtype=values.dtype, copy=False
+                    values,
+                    index,
+                    columns,
+                    dtype=values.dtype,
+                    copy=False,
+                    consolidate=consolidate,
                 )
 
         NDFrame.__init__(self, mgr)
@@ -1665,6 +1716,7 @@ def from_records(
         columns=None,
         coerce_float=False,
         nrows=None,
+        consolidate: bool = True,
     ) -> DataFrame:
         """
         Convert structured or record ndarray to DataFrame.
@@ -1692,6 +1744,8 @@ def from_records(
             decimal.Decimal) to floating point, useful for SQL result sets.
         nrows : int, default None
             Number of rows to read if data is an iterator.
+        consolidate: bool, default True
+            Whether to consolidate the arrays in the new DataFrame.
 
         Returns
         -------
@@ -1827,7 +1881,9 @@ def from_records(
             arr_columns = arr_columns.drop(arr_exclude)
             columns = columns.drop(exclude)
 
-        mgr = arrays_to_mgr(arrays, arr_columns, result_index, columns)
+        mgr = arrays_to_mgr(
+            arrays, arr_columns, result_index, columns, consolidate=consolidate
+        )
 
         return cls(mgr)
 
@@ -2006,6 +2062,7 @@ def _from_arrays(
         index,
         dtype: Optional[Dtype] = None,
         verify_integrity: bool = True,
+        consolidate: bool = True,
     ) -> DataFrame:
         """
         Create DataFrame from a list of arrays corresponding to the columns.
@@ -2026,6 +2083,8 @@ def _from_arrays(
             stored in a block (numpy ndarray or ExtensionArray), have the same
             length as and are aligned with the index, and that `columns` and
             `index` are ensured to be an Index object.
+        consolidate: bool, default True
+            Whether to consolidate the passed arrays in the new DataFrame.
 
         Returns
         -------
@@ -2041,6 +2100,7 @@ def _from_arrays(
             columns,
             dtype=dtype,
             verify_integrity=verify_integrity,
+            consolidate=consolidate,
         )
         return cls(mgr)
 
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 6244f1bf0a2d2..618e06ad30d8a 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -65,6 +65,7 @@ def arrays_to_mgr(
     columns,
     dtype: Optional[DtypeObj] = None,
     verify_integrity: bool = True,
+    consolidate: bool = True,
 ):
     """
     Segregate Series based on type and coerce into matrices.
@@ -91,7 +92,9 @@ def arrays_to_mgr(
     # from BlockManager perspective
     axes = [columns, index]
 
-    return create_block_manager_from_arrays(arrays, arr_names, axes)
+    return create_block_manager_from_arrays(
+        arrays, arr_names, axes, consolidate=consolidate
+    )
 
 
 def masked_rec_array_to_mgr(
@@ -130,7 +133,9 @@ def masked_rec_array_to_mgr(
     if columns is None:
         columns = arr_columns
 
-    mgr = arrays_to_mgr(arrays, arr_columns, index, columns, dtype)
+    mgr = arrays_to_mgr(
+        arrays, arr_columns, index, columns, dtype, consolidate=True
+    )  # FIXME: dont hardcode
 
     if copy:
         mgr = mgr.copy()
@@ -141,7 +146,14 @@ def masked_rec_array_to_mgr(
 # DataFrame Constructor Interface
 
 
-def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
+def init_ndarray(
+    values,
+    index,
+    columns,
+    dtype: Optional[DtypeObj],
+    copy: bool,
+    consolidate: bool = True,
+):
     # input must be a ndarray, list, Series, index
 
     if isinstance(values, ABCSeries):
@@ -170,7 +182,9 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
             values = values.copy()
 
         index, columns = _get_axes(len(values), 1, index, columns)
-        return arrays_to_mgr([values], columns, index, columns, dtype=dtype)
+        return arrays_to_mgr(
+            [values], columns, index, columns, dtype=dtype, consolidate=consolidate
+        )
     elif is_extension_array_dtype(values) or is_extension_array_dtype(dtype):
         # GH#19157
 
@@ -184,7 +198,9 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
         if columns is None:
             columns = Index(range(len(values)))
 
-        return arrays_to_mgr(values, columns, index, columns, dtype=dtype)
+        return arrays_to_mgr(
+            values, columns, index, columns, dtype=dtype, consolidate=consolidate
+        )
 
     # by definition an array here
     # the dtypes will be coerced to a single dtype
@@ -233,10 +249,18 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
     else:
         block_values = [values]
 
-    return create_block_manager_from_blocks(block_values, [columns, index])
+    return create_block_manager_from_blocks(
+        block_values, [columns, index], consolidate=consolidate
+    )
 
 
-def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):
+def init_dict(
+    data: Dict,
+    index,
+    columns,
+    dtype: Optional[DtypeObj] = None,
+    consolidate: bool = True,
+):
     """
     Segregate Series based on type and coerce into matrices.
     Needs to handle a lot of exceptional cases.
@@ -282,7 +306,9 @@ def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):
         arrays = [
             arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
         ]
-    return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
+    return arrays_to_mgr(
+        arrays, data_names, index, columns, dtype=dtype, consolidate=consolidate
+    )
 
 
 # ---------------------------------------------------------------------
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index f2480adce89b4..2a08e2dcbe136 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1595,7 +1595,9 @@ def fast_xs(self, loc):
 # Constructor Helpers
 
 
-def create_block_manager_from_blocks(blocks, axes: List[Index]) -> BlockManager:
+def create_block_manager_from_blocks(
+    blocks, axes: List[Index], consolidate: bool = True
+) -> BlockManager:
     try:
         if len(blocks) == 1 and not isinstance(blocks[0], Block):
             # if blocks[0] is of length 0, return empty blocks
@@ -1610,7 +1612,8 @@ def create_block_manager_from_blocks(blocks, axes: List[Index]) -> BlockManager:
                 ]
 
         mgr = BlockManager(blocks, axes)
-        mgr._consolidate_inplace()
+        if consolidate:
+            mgr._consolidate_inplace()
         return mgr
 
     except ValueError as e:
@@ -1620,7 +1623,10 @@ def create_block_manager_from_blocks(blocks, axes: List[Index]) -> BlockManager:
 
 
 def create_block_manager_from_arrays(
-    arrays, names: Index, axes: List[Index]
+    arrays,
+    names: Index,
+    axes: List[Index],
+    consolidate: bool = True,
 ) -> BlockManager:
     assert isinstance(names, Index)
     assert isinstance(axes, list)
@@ -1629,10 +1635,11 @@ def create_block_manager_from_arrays(
     try:
         blocks = form_blocks(arrays, names, axes)
         mgr = BlockManager(blocks, axes)
-        mgr._consolidate_inplace()
-        return mgr
     except ValueError as e:
         raise construction_error(len(arrays), arrays[0].shape, axes, e)
+    if consolidate:
+        mgr._consolidate_inplace()
+    return mgr
 
 
 def construction_error(tot_items, block_shape, axes, e=None):
diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index 36e3a0e37c1ae..c8a968e61983f 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -272,7 +272,7 @@ def dispatch_to_series(left, right, func, axis: Optional[int] = None):
         raise NotImplementedError(right)
 
     return type(left)._from_arrays(
-        arrays, left.columns, left.index, verify_integrity=False
+        arrays, left.columns, left.index, verify_integrity=False, consolidate=False
     )
 
 

From 5c94129f69f33e40e9edfcd58e3f980aa42b6360 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 5 Oct 2020 15:00:42 -0700
Subject: [PATCH 2/2] mypy fixup

---
 pandas/tests/arithmetic/conftest.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/arithmetic/conftest.py b/pandas/tests/arithmetic/conftest.py
index 6286711ac6113..c808644e007b0 100644
--- a/pandas/tests/arithmetic/conftest.py
+++ b/pandas/tests/arithmetic/conftest.py
@@ -221,7 +221,9 @@ def mismatched_freq(request):
 # ------------------------------------------------------------------
 
 
-@pytest.fixture(params=[pd.Index, pd.Series, pd.DataFrame], ids=id_func)
+@pytest.fixture(
+    params=[pd.Index, pd.Series, pd.DataFrame], ids=id_func  # type: ignore[list-item]
+)
 def box(request):
     """
     Several array-like containers that should have effectively identical