From 1697252a73785bb4ad1bfff82304d5c37534897f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 17 Mar 2020 09:59:21 -0700
Subject: [PATCH 01/31] PERF: block-wise arithmetic for frame-with-frame

---
 pandas/core/arrays/datetimelike.py         |  2 +-
 pandas/core/ops/__init__.py                | 72 +++++++++++++++++++++-
 pandas/core/ops/array_ops.py               |  2 +-
 pandas/tests/arithmetic/common.py          |  9 ++-
 pandas/tests/arithmetic/test_datetime64.py |  7 ++-
 5 files changed, 83 insertions(+), 9 deletions(-)

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 105d9581b1a25..46f2c239b4193 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -1299,7 +1299,7 @@ def _addsub_object_array(self, other: np.ndarray, op):
         result : same class as self
         """
         assert op in [operator.add, operator.sub]
-        if len(other) == 1:
+        if len(other) == 1 and self.ndim == other.ndim == 1:
             return op(self, other[0])
 
         warnings.warn(
diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index 3153a9ac28c10..f80ad80e15d7b 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -5,7 +5,7 @@
 """
 import datetime
 import operator
-from typing import TYPE_CHECKING, Optional, Set, Tuple
+from typing import TYPE_CHECKING, List, Optional, Set, Tuple
 
 import numpy as np
 
@@ -58,6 +58,7 @@
 
 if TYPE_CHECKING:
     from pandas import DataFrame  # noqa:F401
+    from pandas.core.internals.blocks import Block  # noqa: F401
 
 # -----------------------------------------------------------------------------
 # constants
@@ -353,6 +354,70 @@ def fill_binop(left, right, fill_value):
 # Dispatch logic
 
 
+def operate_blockwise(left, right, array_op):
+    assert right._indexed_same(left)
+
+    res_blks: List["Block"] = []
+    rmgr = right._data
+    for n, blk in enumerate(left._data.blocks):
+        locs = blk.mgr_locs
+
+        blk_vals = blk.values
+
+        if not isinstance(blk_vals, np.ndarray):
+            # 1D EA
+            assert len(locs) == 1, locs
+            rser = right.iloc[:, locs[0]]
+            rvals = extract_array(rser, extract_numpy=True)
+            res_values = array_op(blk_vals, rvals)
+            nbs = blk._split_op_result(res_values)
+            res_blks.extend(nbs)
+            continue
+
+        rblks = rmgr._slice_take_blocks_ax0(locs.indexer)
+
+        for k, rblk in enumerate(rblks):
+            lvals = blk_vals[rblk.mgr_locs.indexer, :]
+            rvals = rblk.values
+
+            if not isinstance(rvals, np.ndarray):
+                # 1D EA
+                assert lvals.shape[0] == 1, lvals.shape
+                lvals = lvals[0, :]
+                res_values = array_op(lvals, rvals)
+                nbs = rblk._split_op_result(res_values)
+                assert len(nbs) == 1
+                nb = nbs[0]
+                nb.mgr_locs = locs.as_array[nb.mgr_locs]
+                res_blks.append(nb)
+                continue
+
+            assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
+
+            res_values = array_op(lvals, rvals)
+            assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape)
+            nbs = rblk._split_op_result(res_values)
+            for nb in nbs:
+                # TODO: maybe optimize by sticking with slices?
+                nb_mgr_locs = nb.mgr_locs
+                nblocs = locs.as_array[nb_mgr_locs.indexer]
+                nb.mgr_locs = nblocs
+                assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape)
+                assert all(x in locs.as_array for x in nb.mgr_locs.as_array)
+
+            res_blks.extend(nbs)
+
+    slocs = set(y for nb in res_blks for y in nb.mgr_locs.as_array)
+    nlocs = sum(len(nb.mgr_locs.as_array) for nb in res_blks)
+    assert nlocs == len(left.columns), (nlocs, len(left.columns))
+    assert len(slocs) == nlocs, (len(slocs), nlocs)
+    assert slocs == set(range(nlocs)), slocs
+
+    # TODO: once this is working, pass do_integrity_check=False
+    new_mgr = type(rmgr)(res_blks, axes=rmgr.axes)
+    return new_mgr
+
+
 def dispatch_to_series(left, right, func, str_rep=None, axis=None):
     """
     Evaluate the frame operation func(left, right) by evaluating
@@ -385,8 +450,9 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None):
     elif isinstance(right, ABCDataFrame):
         assert right._indexed_same(left)
 
-        def column_op(a, b):
-            return {i: func(a.iloc[:, i], b.iloc[:, i]) for i in range(len(a.columns))}
+        array_op = get_array_op(func, str_rep=str_rep)
+        bm = operate_blockwise(left, right, array_op)
+        return type(left)(bm)
 
     elif isinstance(right, ABCSeries) and axis == "columns":
         # We only get here if called via _combine_series_frame,
diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py
index e285c53d9813e..dcef6d8f3c981 100644
--- a/pandas/core/ops/array_ops.py
+++ b/pandas/core/ops/array_ops.py
@@ -132,7 +132,7 @@ def masked_arith_op(x: np.ndarray, y, op):
     return result
 
 
-def define_na_arithmetic_op(op, str_rep: str):
+def define_na_arithmetic_op(op, str_rep: Optional[str]):
     def na_op(x, y):
         return na_arithmetic_op(x, y, op, str_rep)
 
diff --git a/pandas/tests/arithmetic/common.py b/pandas/tests/arithmetic/common.py
index ccc49adc5da82..755fbd0d9036c 100644
--- a/pandas/tests/arithmetic/common.py
+++ b/pandas/tests/arithmetic/common.py
@@ -70,7 +70,14 @@ def assert_invalid_comparison(left, right, box):
     result = right != left
     tm.assert_equal(result, ~expected)
 
-    msg = "Invalid comparison between|Cannot compare type|not supported between"
+    msg = "|".join(
+        [
+            "Invalid comparison between",
+            "Cannot compare type",
+            "not supported between",
+            "invalid type promotion",
+        ]
+    )
     with pytest.raises(TypeError, match=msg):
         left < right
     with pytest.raises(TypeError, match=msg):
diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
index f7211ab5f9fd4..5cadf8bff51f1 100644
--- a/pandas/tests/arithmetic/test_datetime64.py
+++ b/pandas/tests/arithmetic/test_datetime64.py
@@ -964,7 +964,9 @@ def test_dt64arr_sub_dt64object_array(self, box_with_array, tz_naive_fixture):
         obj = tm.box_expected(dti, box_with_array)
         expected = tm.box_expected(expected, box_with_array)
 
-        warn = PerformanceWarning if box_with_array is not pd.DataFrame else None
+        warn = None
+        if box_with_array is not pd.DataFrame or tz_naive_fixture is None:
+            warn = PerformanceWarning
         with tm.assert_produces_warning(warn):
             result = obj - obj.astype(object)
         tm.assert_equal(result, expected)
@@ -1388,8 +1390,7 @@ def test_dt64arr_add_mixed_offset_array(self, box_with_array):
         s = DatetimeIndex([Timestamp("2000-1-1"), Timestamp("2000-2-1")])
         s = tm.box_expected(s, box_with_array)
 
-        warn = None if box_with_array is pd.DataFrame else PerformanceWarning
-        with tm.assert_produces_warning(warn):
+        with tm.assert_produces_warning(PerformanceWarning):
             other = pd.Index([pd.offsets.DateOffset(years=1), pd.offsets.MonthEnd()])
             other = tm.box_expected(other, box_with_array)
             result = s + other

From 30a836d6ce7a7ef0c9f01daca99cb182bebbbcfa Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 17 Mar 2020 10:46:22 -0700
Subject: [PATCH 02/31] lint fixup

---
 pandas/core/ops/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index f80ad80e15d7b..e127be3b7d644 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -407,7 +407,7 @@ def operate_blockwise(left, right, array_op):
 
             res_blks.extend(nbs)
 
-    slocs = set(y for nb in res_blks for y in nb.mgr_locs.as_array)
+    slocs = {y for nb in res_blks for y in nb.mgr_locs.as_array}
     nlocs = sum(len(nb.mgr_locs.as_array) for nb in res_blks)
     assert nlocs == len(left.columns), (nlocs, len(left.columns))
     assert len(slocs) == nlocs, (len(slocs), nlocs)

From 4334353b53ead13ffb777cb869a17e18687be4c2 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 17 Mar 2020 19:14:02 -0700
Subject: [PATCH 03/31] troubleshoot npdev build

---
 pandas/core/ops/array_ops.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py
index dcef6d8f3c981..4338746f05386 100644
--- a/pandas/core/ops/array_ops.py
+++ b/pandas/core/ops/array_ops.py
@@ -5,6 +5,7 @@
 from functools import partial
 import operator
 from typing import Any, Optional
+import warnings
 
 import numpy as np
 
@@ -163,15 +164,18 @@ def na_arithmetic_op(left, right, op, str_rep: Optional[str], is_cmp: bool = Fal
     """
     import pandas.core.computation.expressions as expressions
 
-    try:
-        result = expressions.evaluate(op, str_rep, left, right)
-    except TypeError:
-        if is_cmp:
-            # numexpr failed on comparison op, e.g. ndarray[float] > datetime
-            #  In this case we do not fall back to the masked op, as that
-            #  will handle complex numbers incorrectly, see GH#32047
-            raise
-        result = masked_arith_op(left, right, op)
+    with warnings.catch_warnings():
+        # suppress warnings from numpy about element-wise comparison
+        warnings.simplefilter("ignore", DeprecationWarning)
+        try:
+            result = expressions.evaluate(op, str_rep, left, right)
+        except TypeError:
+            if is_cmp:
+                # numexpr failed on comparison op, e.g. ndarray[float] > datetime
+                #  In this case we do not fall back to the masked op, as that
+                #  will handle complex numbers incorrectly, see GH#32047
+                raise
+            result = masked_arith_op(left, right, op)
 
     if is_cmp and (is_scalar(result) or result is NotImplemented):
         # numpy returned a scalar instead of operating element-wise

From 713a776e551186e9e2e4d480f362072b60042f65 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 24 Mar 2020 19:25:57 -0700
Subject: [PATCH 04/31] comment

---
 pandas/core/arrays/datetimelike.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 6ca45f9acec91..2fdb33b7b8a8f 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -1297,10 +1297,11 @@ def _addsub_object_array(self, other: np.ndarray, op):
         """
         assert op in [operator.add, operator.sub]
         if len(other) == 1 and self.ndim == other.ndim == 1:
+            # If both 1D then broadcasting is unambiguous
             return op(self, other[0])
 
         warnings.warn(
-            "Adding/subtracting array of DateOffsets to "
+            "Adding/subtracting object-dtype array to "
             f"{type(self).__name__} not vectorized",
             PerformanceWarning,
         )
@@ -1308,7 +1309,7 @@ def _addsub_object_array(self, other: np.ndarray, op):
         # Caller is responsible for broadcasting if necessary
         assert self.shape == other.shape, (self.shape, other.shape)
 
-        res_values = op(self.astype("O"), np.array(other))
+        res_values = op(self.astype("O"), np.asarray(other))
         result = array(res_values.ravel())
         result = extract_array(result, extract_numpy=True).reshape(self.shape)
         return result

From 95ef3adddd6554ff1aa21ba3304cc9b175f6c49b Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 24 Mar 2020 20:45:20 -0700
Subject: [PATCH 05/31] checkpoint passing

---
 pandas/core/ops/__init__.py | 51 +++++++++++++++++++++++++------------
 1 file changed, 35 insertions(+), 16 deletions(-)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index caf420f06a7fb..4fdd18418f423 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -304,43 +304,62 @@ def operate_blockwise(left, right, array_op):
 
         blk_vals = blk.values
 
+        left_ea = not isinstance(blk_vals, np.ndarray)
+
         if not isinstance(blk_vals, np.ndarray):
             # 1D EA
             assert len(locs) == 1, locs
-            rser = right.iloc[:, locs[0]]
-            rvals = extract_array(rser, extract_numpy=True)
+            rblks = rmgr._slice_take_blocks_ax0(locs.indexer)
+            assert len(rblks) == 1, rblks
+            rblk = rblks[0]
+            assert rblk.shape[0] == 1, rblk.shape
+
+            rvals = rblk.values
+            right_ea = not isinstance(rvals, np.ndarray)
+            if not right_ea:
+                assert rvals.shape[0] == 1, rvals.shape
+                rvals = rvals[0, :]
+            #rser = right.iloc[:, locs[0]]
+            #rvals = extract_array(rser, extract_numpy=True)
             res_values = array_op(blk_vals, rvals)
             nbs = blk._split_op_result(res_values)
+            # Setting nb.mgr_locs is unnecessary here, but harmless
             res_blks.extend(nbs)
             continue
 
         rblks = rmgr._slice_take_blocks_ax0(locs.indexer)
 
         for k, rblk in enumerate(rblks):
-            lvals = blk_vals[rblk.mgr_locs.indexer, :]
             rvals = rblk.values
+            right_ea = not isinstance(rvals, np.ndarray)
 
-            if not isinstance(rvals, np.ndarray):
+            lvals = blk_vals[rblk.mgr_locs.indexer, :]
+
+            if not (left_ea or right_ea):
+                assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
+            #elif left_ea and right_ea:
+            #    assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
+            elif right_ea:
                 # 1D EA
                 assert lvals.shape[0] == 1, lvals.shape
                 lvals = lvals[0, :]
-                res_values = array_op(lvals, rvals)
-                nbs = rblk._split_op_result(res_values)
-                assert len(nbs) == 1
-                nb = nbs[0]
-                nb.mgr_locs = locs.as_array[nb.mgr_locs]
-                res_blks.append(nb)
-                continue
-
-            assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
+            else:
+                assert False  # should be unreachable ATM
+                #assert rvals.shape[0] == 1, rvals.shape
+                #rvals = rvals[0, :]
 
             res_values = array_op(lvals, rvals)
-            assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape)
             nbs = rblk._split_op_result(res_values)
+
+            # Debugging assertions
+            if right_ea:  # or left_ea
+                assert len(nbs) == 1
+            else:
+                assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape)
+
             for nb in nbs:
                 # TODO: maybe optimize by sticking with slices?
-                nb_mgr_locs = nb.mgr_locs
-                nblocs = locs.as_array[nb_mgr_locs.indexer]
+                nblocs = locs.as_array[nb.mgr_locs.indexer]
                 nb.mgr_locs = nblocs
                 assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape)
                 assert all(x in locs.as_array for x in nb.mgr_locs.as_array)

From 61e5cd6260d3a94f195fa5466a6c9cc0a6bd7607 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 25 Mar 2020 09:29:34 -0700
Subject: [PATCH 06/31] checkpoint passing

---
 pandas/core/ops/__init__.py | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index 4fdd18418f423..687d1b8bb7891 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -306,7 +306,7 @@ def operate_blockwise(left, right, array_op):
 
         left_ea = not isinstance(blk_vals, np.ndarray)
 
-        if not isinstance(blk_vals, np.ndarray):
+        if False:#left_ea:
             # 1D EA
             assert len(locs) == 1, locs
             rblks = rmgr._slice_take_blocks_ax0(locs.indexer)
@@ -319,8 +319,6 @@ def operate_blockwise(left, right, array_op):
             if not right_ea:
                 assert rvals.shape[0] == 1, rvals.shape
                 rvals = rvals[0, :]
-            #rser = right.iloc[:, locs[0]]
-            #rvals = extract_array(rser, extract_numpy=True)
             res_values = array_op(blk_vals, rvals)
             nbs = blk._split_op_result(res_values)
             # Setting nb.mgr_locs is unnecessary here, but harmless
@@ -329,30 +327,38 @@ def operate_blockwise(left, right, array_op):
 
         rblks = rmgr._slice_take_blocks_ax0(locs.indexer)
 
+        if left_ea:
+            assert len(locs) == 1, locs
+            assert len(rblks) == 1, rblks
+            assert rblks[0].shape[0] == 1, rblks[0].shape
+
         for k, rblk in enumerate(rblks):
             rvals = rblk.values
             right_ea = not isinstance(rvals, np.ndarray)
 
-            lvals = blk_vals[rblk.mgr_locs.indexer, :]
+            #lvals = blk_vals[rblk.mgr_locs.indexer, :]
 
             if not (left_ea or right_ea):
+                lvals = blk_vals[rblk.mgr_locs.indexer, :]
+                assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
+            elif left_ea and right_ea:
+                lvals = blk_vals
                 assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
-            #elif left_ea and right_ea:
-            #    assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
             elif right_ea:
                 # 1D EA
+                lvals = blk_vals[rblk.mgr_locs.indexer, :]
                 assert lvals.shape[0] == 1, lvals.shape
                 lvals = lvals[0, :]
             else:
-                assert False  # should be unreachable ATM
-                #assert rvals.shape[0] == 1, rvals.shape
-                #rvals = rvals[0, :]
+                lvals = blk_vals
+                assert rvals.shape[0] == 1, rvals.shape
+                rvals = rvals[0, :]
 
             res_values = array_op(lvals, rvals)
             nbs = rblk._split_op_result(res_values)
 
             # Debugging assertions
-            if right_ea:  # or left_ea
+            if right_ea or left_ea:
                 assert len(nbs) == 1
             else:
                 assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape)

From 89c3d7bdef4a1b89f14161ac4db3a87611aa3cec Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 25 Mar 2020 10:03:31 -0700
Subject: [PATCH 07/31] refactor

---
 pandas/core/ops/__init__.py | 75 ++++++++++++++++---------------------
 1 file changed, 32 insertions(+), 43 deletions(-)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index 687d1b8bb7891..b285c5494ee71 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -4,7 +4,7 @@
 This is not a public API.
 """
 import operator
-from typing import TYPE_CHECKING, List, Optional, Set
+from typing import TYPE_CHECKING, List, Optional, Set, Tuple
 
 import numpy as np
 
@@ -297,34 +297,41 @@ def fill_binop(left, right, fill_value):
 def operate_blockwise(left, right, array_op):
     assert right._indexed_same(left)
 
+    def get_same_shape_values(
+        lblk: "Block", rblk: "Block", left_ea: bool, right_ea: bool
+    ) -> Tuple[ArrayLike, ArrayLike]:
+        """
+        Slice lblk.values to align with rblk.  Squeeze if we have EAs.
+        """
+        lvals = lblk.values
+        rvals = rblk.values
+
+        # TODO(EA2D): with 2D EAs pnly this first clause would be needed
+        if not (left_ea or right_ea):
+            lvals = lvals[rblk.mgr_locs.indexer, :]
+            assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
+        elif left_ea and right_ea:
+            assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
+        elif right_ea:
+            # lvals are 2D, rvals are 1D
+            lvals = lvals[rblk.mgr_locs.indexer, :]
+            assert lvals.shape[0] == 1, lvals.shape
+            lvals = lvals[0, :]
+        else:
+            # lvals are 1D, rvals are 2D
+            assert rvals.shape[0] == 1, rvals.shape
+            rvals = rvals[0, :]
+
+        return lvals, rvals
+
     res_blks: List["Block"] = []
     rmgr = right._data
     for n, blk in enumerate(left._data.blocks):
         locs = blk.mgr_locs
-
         blk_vals = blk.values
 
         left_ea = not isinstance(blk_vals, np.ndarray)
 
-        if False:#left_ea:
-            # 1D EA
-            assert len(locs) == 1, locs
-            rblks = rmgr._slice_take_blocks_ax0(locs.indexer)
-            assert len(rblks) == 1, rblks
-            rblk = rblks[0]
-            assert rblk.shape[0] == 1, rblk.shape
-
-            rvals = rblk.values
-            right_ea = not isinstance(rvals, np.ndarray)
-            if not right_ea:
-                assert rvals.shape[0] == 1, rvals.shape
-                rvals = rvals[0, :]
-            res_values = array_op(blk_vals, rvals)
-            nbs = blk._split_op_result(res_values)
-            # Setting nb.mgr_locs is unnecessary here, but harmless
-            res_blks.extend(nbs)
-            continue
-
         rblks = rmgr._slice_take_blocks_ax0(locs.indexer)
 
         if left_ea:
@@ -333,38 +340,20 @@ def operate_blockwise(left, right, array_op):
             assert rblks[0].shape[0] == 1, rblks[0].shape
 
         for k, rblk in enumerate(rblks):
-            rvals = rblk.values
-            right_ea = not isinstance(rvals, np.ndarray)
-
-            #lvals = blk_vals[rblk.mgr_locs.indexer, :]
-
-            if not (left_ea or right_ea):
-                lvals = blk_vals[rblk.mgr_locs.indexer, :]
-                assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
-            elif left_ea and right_ea:
-                lvals = blk_vals
-                assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
-            elif right_ea:
-                # 1D EA
-                lvals = blk_vals[rblk.mgr_locs.indexer, :]
-                assert lvals.shape[0] == 1, lvals.shape
-                lvals = lvals[0, :]
-            else:
-                lvals = blk_vals
-                assert rvals.shape[0] == 1, rvals.shape
-                rvals = rvals[0, :]
+            right_ea = not isinstance(rblk.values, np.ndarray)
+
+            lvals, rvals = get_same_shape_values(blk, rblk, left_ea, right_ea)
 
             res_values = array_op(lvals, rvals)
             nbs = rblk._split_op_result(res_values)
 
-            # Debugging assertions
             if right_ea or left_ea:
                 assert len(nbs) == 1
             else:
                 assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape)
 
             for nb in nbs:
-                # TODO: maybe optimize by sticking with slices?
+                # Reset mgr_locs to correspond to our original DataFrame
                 nblocs = locs.as_array[nb.mgr_locs.indexer]
                 nb.mgr_locs = nblocs
                 assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape)

From e348e464457556b1678c898c9cdc7366cd0b3877 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 25 Mar 2020 10:11:25 -0700
Subject: [PATCH 08/31] blackify

---
 pandas/tests/frame/test_arithmetic.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index 18cd2a4b0c90b..0b73583002d5c 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -1,7 +1,6 @@
 from collections import deque
 from datetime import datetime
 import operator
-import re
 
 import numpy as np
 import pytest
@@ -47,10 +46,11 @@ def check(df, df2):
                 )
                 tm.assert_frame_equal(result, expected)
 
-                msg = "|".join([
+                msgs = [
                     r"Invalid comparison between dtype=datetime64\[ns\] and ndarray",
                     "invalid type promotion",
-                    ])
+                ]
+                msg = "|".join(msgs)
                 with pytest.raises(TypeError, match=msg):
                     x >= y
                 with pytest.raises(TypeError, match=msg):

From 2b1ba182144759b68e1a7b4eb8f3bfca4f2a05fb Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 31 Mar 2020 14:44:03 -0700
Subject: [PATCH 09/31] disable assertions for perf

---
 pandas/core/ops/__init__.py           | 19 ++++++++++---------
 pandas/tests/frame/test_arithmetic.py |  1 +
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index b285c5494ee71..3911380c7f795 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -356,19 +356,20 @@ def get_same_shape_values(
                 # Reset mgr_locs to correspond to our original DataFrame
                 nblocs = locs.as_array[nb.mgr_locs.indexer]
                 nb.mgr_locs = nblocs
-                assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape)
-                assert all(x in locs.as_array for x in nb.mgr_locs.as_array)
+                # Assertions are disabled for performance, but should hold:
+                #  assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape)
+                #  assert all(x in locs.as_array for x in nb.mgr_locs.as_array)
 
             res_blks.extend(nbs)
 
-    slocs = {y for nb in res_blks for y in nb.mgr_locs.as_array}
-    nlocs = sum(len(nb.mgr_locs.as_array) for nb in res_blks)
-    assert nlocs == len(left.columns), (nlocs, len(left.columns))
-    assert len(slocs) == nlocs, (len(slocs), nlocs)
-    assert slocs == set(range(nlocs)), slocs
+    # Assertions are disabled for performance, but should hold:
+    #  slocs = {y for nb in res_blks for y in nb.mgr_locs.as_array}
+    #  nlocs = sum(len(nb.mgr_locs.as_array) for nb in res_blks)
+    #  assert nlocs == len(left.columns), (nlocs, len(left.columns))
+    #  assert len(slocs) == nlocs, (len(slocs), nlocs)
+    #  assert slocs == set(range(nlocs)), slocs
 
-    # TODO: once this is working, pass do_integrity_check=False
-    new_mgr = type(rmgr)(res_blks, axes=rmgr.axes)
+    new_mgr = type(rmgr)(res_blks, axes=rmgr.axes, do_integrity_check=False)
     return new_mgr
 
 
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index 01ea3fc8676de..a5d696be24d0a 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -1,6 +1,7 @@
 from collections import deque
 from datetime import datetime
 import operator
+import re
 
 import numpy as np
 import pytest

From 91c86a35072e87ec5fa16fb41a89f4c8c7cb65df Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 31 Mar 2020 21:05:01 -0700
Subject: [PATCH 10/31] asv

---
 asv_bench/benchmarks/arithmetic.py | 48 ++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py
index 5a8b109c21858..b53ef9505990d 100644
--- a/asv_bench/benchmarks/arithmetic.py
+++ b/asv_bench/benchmarks/arithmetic.py
@@ -80,6 +80,54 @@ def time_frame_op_with_series_axis0(self, opname):
         getattr(self.df, opname)(self.ser, axis=0)
 
 
+class FrameWithFrameWide:
+    # Many-columns, mixed dtypes
+
+    params = [
+        [
+            operator.add,
+            operator.sub,
+            operator.mul,
+            operator.truediv,
+            operator.floordiv,
+            operator.pow,
+            operator.mod,
+            operator.eq,
+            operator.ne,
+            operator.gt,
+            operator.ge,
+            operator.lt,
+            operator.le,
+        ]
+    ]
+    param_names = ["op"]
+
+    def setup(self, op):
+        # we choose dtypes so as to make the blocks
+        #  a) not perfectly match between right and left
+        #  b) appreciably bigger than single columns
+        arr = np.random.randn(10 ** 6).reshape(500, 2000).astype(np.float64)
+        df = pd.DataFrame(arr)
+        df[1000] = df[1000].astype(np.float32)
+        df.iloc[:, 1000:] = df.iloc[:, 1000:].astype(np.float32)
+
+        # TODO: GH#33198 the setting here shoudlnt need two steps
+        df2 = pd.DataFrame(arr)
+        df2[1000] = df2[1000].astype(np.int64)
+        df2.iloc[:, 500:1500] = df2.iloc[:, 500:1500].astype(np.int64)
+
+        self.left = df
+        self.right = df
+
+    def time_op_different_blocks(self, op):
+        # blocks (and dtypes) are not aligned
+        op(self.left, self.right)
+
+    def time_op_same_blocks(self, op):
+        # blocks (and dtypes) are aligned
+        op(self.left, self.left)
+
+
 class Ops:
 
     params = [[True, False], ["default", 1]]

From 2034084db2a999691bcf28f424fc520dadcdeead Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 1 Apr 2020 08:03:28 -0700
Subject: [PATCH 11/31] whatsnew

---
 doc/source/whatsnew/v1.1.0.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 25f847c698278..d424315f5e416 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -274,7 +274,8 @@ Performance improvements
   :meth:`DataFrame.sparse.from_spmatrix` constructor (:issue:`32821`,
   :issue:`32825`,  :issue:`32826`, :issue:`32856`, :issue:`32858`).
 - Performance improvement in :meth:`Series.sum` for nullable (integer and boolean) dtypes (:issue:`30982`).
-
+- Performance improvement in arithmetic operations between two :class:`DataFrame` objects (:issue:`32779`)
+-
 
 .. ---------------------------------------------------------------------------
 

From 0c12d35e5cb4737fae982a8df1aa90fc9332c1d6 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 3 Apr 2020 10:14:00 -0700
Subject: [PATCH 12/31] revert warning suppression

---
 pandas/core/ops/array_ops.py | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py
index f419c548b27c1..c17955457245b 100644
--- a/pandas/core/ops/array_ops.py
+++ b/pandas/core/ops/array_ops.py
@@ -6,7 +6,6 @@
 from functools import partial
 import operator
 from typing import Any, Optional, Tuple
-import warnings
 
 import numpy as np
 
@@ -159,17 +158,14 @@ def na_arithmetic_op(left, right, op, str_rep: Optional[str], is_cmp: bool = Fal
     """
     import pandas.core.computation.expressions as expressions
 
-    with warnings.catch_warnings():
-        # suppress warnings from numpy about element-wise comparison
-        warnings.simplefilter("ignore", DeprecationWarning)
-        try:
-            result = expressions.evaluate(op, str_rep, left, right)
-        except TypeError:
-            if is_cmp:
-                # numexpr failed on comparison op, e.g. ndarray[float] > datetime
-                #  In this case we do not fall back to the masked op, as that
-                #  will handle complex numbers incorrectly, see GH#32047
-                raise
+    try:
+        result = expressions.evaluate(op, str_rep, left, right)
+    except TypeError:
+        if is_cmp:
+            # numexpr failed on comparison op, e.g. ndarray[float] > datetime
+            #  In this case we do not fall back to the masked op, as that
+            #  will handle complex numbers incorrectly, see GH#32047
+            raise
             result = masked_arith_op(left, right, op)
 
     if is_cmp and (is_scalar(result) or result is NotImplemented):

From 9727562801ac6a1a98bd4ee348fe3666ec49e801 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 3 Apr 2020 10:51:49 -0700
Subject: [PATCH 13/31] Fixupm indentation

---
 pandas/core/ops/array_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py
index c17955457245b..05ec48c206b3c 100644
--- a/pandas/core/ops/array_ops.py
+++ b/pandas/core/ops/array_ops.py
@@ -166,7 +166,7 @@ def na_arithmetic_op(left, right, op, str_rep: Optional[str], is_cmp: bool = Fal
             #  In this case we do not fall back to the masked op, as that
             #  will handle complex numbers incorrectly, see GH#32047
             raise
-            result = masked_arith_op(left, right, op)
+        result = masked_arith_op(left, right, op)
 
     if is_cmp and (is_scalar(result) or result is NotImplemented):
         # numpy returned a scalar instead of operating element-wise

From 42bbbf3c7ee0ea3d5352191c380972842bf0edd4 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 4 Apr 2020 15:42:26 -0700
Subject: [PATCH 14/31] suppress warning

---
 pandas/core/ops/array_ops.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py
index 05ec48c206b3c..efd0500d351f4 100644
--- a/pandas/core/ops/array_ops.py
+++ b/pandas/core/ops/array_ops.py
@@ -6,6 +6,7 @@
 from functools import partial
 import operator
 from typing import Any, Optional, Tuple
+import warnings
 
 import numpy as np
 
@@ -308,8 +309,13 @@ def comparison_op(
         res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)
 
     else:
-        with np.errstate(all="ignore"):
-            res_values = na_arithmetic_op(lvalues, rvalues, op, str_rep, is_cmp=True)
+        with warnings.catch_warnings():
+            # suppress warnings from numpy about element-wise comparison
+            warnings.simplefilter("ignore", DeprecationWarning)
+            with np.errstate(all="ignore"):
+                res_values = na_arithmetic_op(
+                    lvalues, rvalues, op, str_rep, is_cmp=True
+                )
 
     return res_values
 

From 0d958a3b6022cec977fc9b5f6fc802da62d2f284 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 6 Apr 2020 10:34:43 -0700
Subject: [PATCH 15/31] update asv

---
 asv_bench/benchmarks/arithmetic.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py
index 0473743dabf46..c5ad533ed82b1 100644
--- a/asv_bench/benchmarks/arithmetic.py
+++ b/asv_bench/benchmarks/arithmetic.py
@@ -127,11 +127,13 @@ def setup(self, op):
         df = pd.DataFrame(arr)
         df[1000] = df[1000].astype(np.float32)
         df.iloc[:, 1000:] = df.iloc[:, 1000:].astype(np.float32)
+        df._consolidate_inplace()
 
         # TODO: GH#33198 the setting here shoudlnt need two steps
         df2 = pd.DataFrame(arr)
         df2[1000] = df2[1000].astype(np.int64)
         df2.iloc[:, 500:1500] = df2.iloc[:, 500:1500].astype(np.int64)
+        df2._consolidate_inplace()
 
         self.left = df
         self.right = df

From 56eef516d9fc83d9dfa82c41b61c0d86d7aaf202 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 6 Apr 2020 11:09:41 -0700
Subject: [PATCH 16/31] _data->_mgr

---
 pandas/core/ops/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index bd12fe122bbf0..4d7f4fb026ae8 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -325,8 +325,8 @@ def get_same_shape_values(
         return lvals, rvals
 
     res_blks: List["Block"] = []
-    rmgr = right._data
-    for n, blk in enumerate(left._data.blocks):
+    rmgr = right._mgr
+    for n, blk in enumerate(left._mgr.blocks):
         locs = blk.mgr_locs
         blk_vals = blk.values
 

From ae744b74a082b7305c81b53a02f6ab3762afa2fd Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 9 Apr 2020 14:33:38 -0700
Subject: [PATCH 17/31] update to use faspath constructor

---
 pandas/core/frame.py        |  4 +++-
 pandas/core/ops/__init__.py | 19 +++++++++++--------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index c3018861bce57..e7732265beea1 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -442,6 +442,7 @@ def __init__(
             mgr = self._init_mgr(
                 data, axes=dict(index=index, columns=columns), dtype=dtype, copy=copy
             )
+
         elif isinstance(data, dict):
             mgr = init_dict(data, index, columns, dtype=dtype)
         elif isinstance(data, ma.MaskedArray):
@@ -5453,10 +5454,11 @@ def _construct_result(self, result) -> "DataFrame":
         -------
         DataFrame
         """
-        out = self._constructor(result, index=self.index, copy=False)
+        out = self._constructor(result, copy=False)
         # Pin columns instead of passing to constructor for compat with
         #  non-unique columns case
         out.columns = self.columns
+        out.index = self.index
         return out
 
     def combine(
diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index 4d7f4fb026ae8..8bd6e4244e818 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -332,12 +332,14 @@ def get_same_shape_values(
 
         left_ea = not isinstance(blk_vals, np.ndarray)
 
+        # TODO: joris says this is costly, see if we can optimize
         rblks = rmgr._slice_take_blocks_ax0(locs.indexer)
 
-        if left_ea:
-            assert len(locs) == 1, locs
-            assert len(rblks) == 1, rblks
-            assert rblks[0].shape[0] == 1, rblks[0].shape
+        # Assertions are disabled for performance, but should hold:
+        # if left_ea:
+        #    assert len(locs) == 1, locs
+        #    assert len(rblks) == 1, rblks
+        #    assert rblks[0].shape[0] == 1, rblks[0].shape
 
         for k, rblk in enumerate(rblks):
             right_ea = not isinstance(rblk.values, np.ndarray)
@@ -347,10 +349,11 @@ def get_same_shape_values(
             res_values = array_op(lvals, rvals)
             nbs = rblk._split_op_result(res_values)
 
-            if right_ea or left_ea:
-                assert len(nbs) == 1
-            else:
-                assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape)
+            # Assertions are disabled for performance, but should hold:
+            # if right_ea or left_ea:
+            #    assert len(nbs) == 1
+            # else:
+            #    assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape)
 
             for nb in nbs:
                 # Reset mgr_locs to correspond to our original DataFrame

From f42c40334812f2765f19565d03e82e9263ce1039 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 10 Apr 2020 11:00:55 -0700
Subject: [PATCH 18/31] update import

---
 pandas/core/ops/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index fede06a8970b9..5f6bc230917a4 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -10,7 +10,7 @@
 
 from pandas._libs import lib
 from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op  # noqa:F401
-from pandas._typing import Level
+from pandas._typing import ArrayLike, Level
 from pandas.util._decorators import Appender
 
 from pandas.core.dtypes.common import is_list_like

From 8a2807efdc0e3bd14f44a8dc5efbd54226346647 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 10 Apr 2020 11:01:13 -0700
Subject: [PATCH 19/31] remove unused import

---
 pandas/core/internals/blocks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 75c935cdf2e60..80573f32b936e 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -8,7 +8,7 @@
 
 from pandas._libs import NaT, algos as libalgos, lib, writers
 import pandas._libs.internals as libinternals
-from pandas._libs.tslibs import Timedelta, conversion
+from pandas._libs.tslibs import conversion
 from pandas._libs.tslibs.timezones import tz_compare
 from pandas._typing import ArrayLike
 from pandas.util._validators import validate_bool_kwarg

From fd10fb6cf39619e9a0e66affa5865f6f7755e22f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 10 Apr 2020 14:11:26 -0700
Subject: [PATCH 20/31] rebase compat

---
 pandas/core/ops/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index 5f6bc230917a4..6b703dd7f40aa 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -347,6 +347,8 @@ def get_same_shape_values(
             lvals, rvals = get_same_shape_values(blk, rblk, left_ea, right_ea)
 
             res_values = array_op(lvals, rvals)
+            if left_ea and not right_ea and hasattr(res_values, "reshape"):
+                res_values = res_values.reshape(1, -1)
             nbs = rblk._split_op_result(res_values)
 
             # Assertions are disabled for performance, but should hold:

From 7150e87363cc205f0df380c06812de1ca264d8c3 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 11 Apr 2020 18:09:00 -0700
Subject: [PATCH 21/31] slice instead of take

---
 pandas/core/internals/managers.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index d1293974b776a..175f0b5332f12 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1322,6 +1322,7 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default):
                 if allow_fill and fill_value is None:
                     _, fill_value = maybe_promote(blk.dtype)
 
+                # TODO: Any cases where we can optimize this to slice?
                 return [
                     blk.take_nd(
                         slobj,
@@ -1369,11 +1370,18 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default):
                         blocks.append(newblk)
 
                 else:
-                    blocks.append(
-                        blk.take_nd(
-                            blklocs[mgr_locs.indexer], axis=0, new_mgr_locs=mgr_locs,
-                        )
-                    )
+                    taker = blklocs[mgr_locs.indexer]
+                    # TODO: taker.max() probably isnt the Technically Correct
+                    #  way of calling this?
+                    taker = lib.maybe_indices_to_slice(taker, taker.max())
+
+                    if isinstance(taker, slice):
+                        nb = blk.getitem_block(taker)
+                        nb.mgr_locs = mgr_locs
+                    else:
+                        # TODO: just use getitem_block anyway?
+                        nb = blk.take_nd(taker, axis=0, new_mgr_locs=mgr_locs)
+                    blocks.append(nb)
 
         return blocks
 

From 0ca2125d85f1b68ad4bc03fc462278b5bb387700 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 14 Apr 2020 07:06:21 -0700
Subject: [PATCH 22/31] Dummy commit to force CI


From 2bfc30885b6e3a0bcf1d60ab8b0e731b90c1579e Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 15 Apr 2020 07:22:30 -0700
Subject: [PATCH 23/31] update call bound

---
 pandas/core/internals/managers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index bdeb7820d2762..12f7fd3decb43 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1371,9 +1371,9 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default):
 
                 else:
                     taker = blklocs[mgr_locs.indexer]
-                    # TODO: taker.max() probably isnt the Technically Correct
+                    # TODO: taker.max()+1 probably isnt the Technically Correct
                     #  way of calling this?
-                    taker = lib.maybe_indices_to_slice(taker, taker.max())
+                    taker = lib.maybe_indices_to_slice(taker, taker.max() + 1)
 
                     if isinstance(taker, slice):
                         nb = blk.getitem_block(taker)

From d5ad2a079c1e5ddd097f9bdff32c1ecf8dce890e Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 15 Apr 2020 11:49:57 -0700
Subject: [PATCH 24/31] update max_len

---
 pandas/core/internals/managers.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 12f7fd3decb43..089a12ff40bd9 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1322,7 +1322,6 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default):
                 if allow_fill and fill_value is None:
                     _, fill_value = maybe_promote(blk.dtype)
 
-                # TODO: Any cases where we can optimize this to slice?
                 return [
                     blk.take_nd(
                         slobj,
@@ -1371,13 +1370,11 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default):
 
                 else:
                     taker = blklocs[mgr_locs.indexer]
-                    # TODO: taker.max()+1 probably isnt the Technically Correct
-                    #  way of calling this?
-                    taker = lib.maybe_indices_to_slice(taker, taker.max() + 1)
+                    max_len = max(len(mgr_locs), taker.max() + 1)
+                    taker = lib.maybe_indices_to_slice(taker, max_len)
 
                     if isinstance(taker, slice):
-                        nb = blk.getitem_block(taker)
-                        nb.mgr_locs = mgr_locs
+                        nb = blk.getitem_block(taker, new_mgr_locs=mgr_locs)
                     else:
                         # TODO: just use getitem_block anyway?
                         nb = blk.take_nd(taker, axis=0, new_mgr_locs=mgr_locs)

From 92007e8c508c5ca6b2b98cb52999268b5e2e8b44 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 15 Apr 2020 18:58:59 -0700
Subject: [PATCH 25/31] revert

---
 asv_bench/benchmarks/arithmetic.py            |  50 --------
 doc/source/whatsnew/v1.1.0.rst                |   3 +-
 pandas/_libs/tslibs/period.pyx                | 108 +++++++-----------
 pandas/core/arrays/datetimelike.py            |   7 +-
 pandas/core/frame.py                          |  25 ++--
 pandas/core/generic.py                        |  12 +-
 pandas/core/indexes/category.py               |   8 +-
 pandas/core/indexing.py                       |  32 ++++--
 pandas/core/internals/blocks.py               |  55 ---------
 pandas/core/internals/concat.py               |  19 ++-
 pandas/core/ops/__init__.py                   |  98 ++--------------
 pandas/core/ops/array_ops.py                  |  68 +----------
 pandas/tests/arithmetic/common.py             |   9 +-
 pandas/tests/arithmetic/test_datetime64.py    |   7 +-
 pandas/tests/extension/test_external_block.py |   6 -
 pandas/tests/frame/test_api.py                |  13 ++-
 pandas/tests/frame/test_arithmetic.py         |   8 +-
 pandas/tests/indexing/test_scalar.py          |  40 +++++++
 pandas/tests/internals/test_internals.py      | 107 +++++++++--------
 .../tests/resample/test_resampler_grouper.py  |  10 +-
 pandas/tests/series/test_api.py               |  13 ++-
 pandas/util/_decorators.py                    |   8 +-
 22 files changed, 261 insertions(+), 445 deletions(-)

diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py
index c5ad533ed82b1..2745db58e83e3 100644
--- a/asv_bench/benchmarks/arithmetic.py
+++ b/asv_bench/benchmarks/arithmetic.py
@@ -97,56 +97,6 @@ def time_frame_op_with_series_axis0(self, opname):
         getattr(self.df, opname)(self.ser, axis=0)
 
 
-class FrameWithFrameWide:
-    # Many-columns, mixed dtypes
-
-    params = [
-        [
-            operator.add,
-            operator.sub,
-            operator.mul,
-            operator.truediv,
-            operator.floordiv,
-            operator.pow,
-            operator.mod,
-            operator.eq,
-            operator.ne,
-            operator.gt,
-            operator.ge,
-            operator.lt,
-            operator.le,
-        ]
-    ]
-    param_names = ["op"]
-
-    def setup(self, op):
-        # we choose dtypes so as to make the blocks
-        #  a) not perfectly match between right and left
-        #  b) appreciably bigger than single columns
-        arr = np.random.randn(10 ** 6).reshape(500, 2000).astype(np.float64)
-        df = pd.DataFrame(arr)
-        df[1000] = df[1000].astype(np.float32)
-        df.iloc[:, 1000:] = df.iloc[:, 1000:].astype(np.float32)
-        df._consolidate_inplace()
-
-        # TODO: GH#33198 the setting here shoudlnt need two steps
-        df2 = pd.DataFrame(arr)
-        df2[1000] = df2[1000].astype(np.int64)
-        df2.iloc[:, 500:1500] = df2.iloc[:, 500:1500].astype(np.int64)
-        df2._consolidate_inplace()
-
-        self.left = df
-        self.right = df
-
-    def time_op_different_blocks(self, op):
-        # blocks (and dtypes) are not aligned
-        op(self.left, self.right)
-
-    def time_op_same_blocks(self, op):
-        # blocks (and dtypes) are aligned
-        op(self.left, self.left)
-
-
 class Ops:
 
     params = [[True, False], ["default", 1]]
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index c7b9d80890daf..82c43811c0444 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -379,7 +379,7 @@ Performance improvements
   :meth:`DataFrame.sparse.from_spmatrix` constructor (:issue:`32821`,
   :issue:`32825`,  :issue:`32826`, :issue:`32856`, :issue:`32858`).
 - Performance improvement in reductions (sum, prod, min, max) for nullable (integer and boolean) dtypes (:issue:`30982`, :issue:`33261`, :issue:`33442`).
-- Performance improvement in arithmetic operations between two :class:`DataFrame` objects (:issue:`32779`)
+
 
 .. ---------------------------------------------------------------------------
 
@@ -467,6 +467,7 @@ Indexing
 - Bug in :meth:`DatetimeIndex.get_loc` raising ``KeyError`` with converted-integer key instead of the user-passed key (:issue:`31425`)
 - Bug in :meth:`Series.xs` incorrectly returning ``Timestamp`` instead of ``datetime64`` in some object-dtype cases (:issue:`31630`)
 - Bug in :meth:`DataFrame.iat` incorrectly returning ``Timestamp`` instead of ``datetime`` in some object-dtype cases (:issue:`32809`)
+- Bug in :meth:`DataFrame.at` when either columns or index is non-unique (:issue:`33041`)
 - Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` when indexing with an integer key on a object-dtype :class:`Index` that is not all-integers (:issue:`31905`)
 - Bug in :meth:`DataFrame.iloc.__setitem__` on a :class:`DataFrame` with duplicate columns incorrectly setting values for all matching columns (:issue:`15686`, :issue:`22036`)
 - Bug in :meth:`DataFrame.loc:` and :meth:`Series.loc` with a :class:`DatetimeIndex`, :class:`TimedeltaIndex`, or :class:`PeriodIndex` incorrectly allowing lookups of non-matching datetime-like dtypes (:issue:`32650`)
diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx
index dd745f840d0ab..c4a7df0017619 100644
--- a/pandas/_libs/tslibs/period.pyx
+++ b/pandas/_libs/tslibs/period.pyx
@@ -806,24 +806,22 @@ cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) nogil:
         return unix_date_to_week(unix_date, freq - FR_WK)
 
 
-cdef void get_date_info(int64_t ordinal, int freq,
-                        npy_datetimestruct *dts) nogil:
+cdef void get_date_info(int64_t ordinal, int freq, npy_datetimestruct *dts) nogil:
     cdef:
-        int64_t unix_date
-        double abstime
+        int64_t unix_date, nanos
+        npy_datetimestruct dts2
 
     unix_date = get_unix_date(ordinal, freq)
-    abstime = get_abs_time(freq, unix_date, ordinal)
-
-    while abstime < 0:
-        abstime += 86400
-        unix_date -= 1
+    nanos = get_time_nanos(freq, unix_date, ordinal)
 
-    while abstime >= 86400:
-        abstime -= 86400
-        unix_date += 1
+    pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, dts)
 
-    date_info_from_days_and_time(dts, unix_date, abstime)
+    dt64_to_dtstruct(nanos, &dts2)
+    dts.hour = dts2.hour
+    dts.min = dts2.min
+    dts.sec = dts2.sec
+    dts.us = dts2.us
+    dts.ps = dts2.ps
 
 
 cdef int64_t get_unix_date(int64_t period_ordinal, int freq) nogil:
@@ -855,74 +853,50 @@ cdef int64_t get_unix_date(int64_t period_ordinal, int freq) nogil:
 
 
 @cython.cdivision
-cdef void date_info_from_days_and_time(npy_datetimestruct *dts,
-                                       int64_t unix_date,
-                                       double abstime) nogil:
+cdef int64_t get_time_nanos(int freq, int64_t unix_date, int64_t ordinal) nogil:
     """
-    Set the instance's value using the given date and time.
+    Find the number of nanoseconds after midnight on the given unix_date
+    that the ordinal represents in the given frequency.
 
     Parameters
     ----------
-    dts : npy_datetimestruct*
+    freq : int
     unix_date : int64_t
-        days elapsed since datetime(1970, 1, 1)
-    abstime : double
-        seconds elapsed since beginning of day described by unix_date
+    ordinal : int64_t
 
-    Notes
-    -----
-    Updates dts inplace
+    Returns
+    -------
+    int64_t
     """
     cdef:
-        int inttime
-        int hour, minute
-        double second, subsecond_fraction
-
-    # Bounds check
-    # The calling function is responsible for ensuring that
-    # abstime >= 0.0 and abstime <= 86400
-
-    # Calculate the date
-    pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, dts)
+        int64_t sub, factor
 
-    # Calculate the time
-    inttime = <int>abstime
-    hour = inttime / 3600
-    minute = (inttime % 3600) / 60
-    second = abstime - <double>(hour * 3600 + minute * 60)
+    freq = get_freq_group(freq)
 
-    dts.hour = hour
-    dts.min = minute
-    dts.sec = <int>second
-
-    subsecond_fraction = second - dts.sec
-    dts.us = int((subsecond_fraction) * 1e6)
-    dts.ps = int(((subsecond_fraction) * 1e6 - dts.us) * 1e6)
+    if freq <= FR_DAY:
+        return 0
 
+    elif freq == FR_NS:
+        factor = 1
 
-@cython.cdivision
-cdef double get_abs_time(int freq, int64_t unix_date, int64_t ordinal) nogil:
-    cdef:
-        int freq_index, day_index, base_index
-        int64_t per_day, start_ord
-        double unit, result
+    elif freq == FR_US:
+        factor = 10**3
 
-    if freq <= FR_DAY:
-        return 0
+    elif freq == FR_MS:
+        factor = 10**6
 
-    freq_index = freq // 1000
-    day_index = FR_DAY // 1000
-    base_index = FR_SEC // 1000
+    elif freq == FR_SEC:
+        factor = 10 **9
 
-    per_day = get_daytime_conversion_factor(day_index, freq_index)
-    unit = get_daytime_conversion_factor(freq_index, base_index)
+    elif freq == FR_MIN:
+        factor = 10**9 * 60
 
-    if base_index < freq_index:
-        unit = 1 / unit
+    else:
+        # We must have freq == FR_HR
+        factor = 10**9 * 3600
 
-    start_ord = unix_date * per_day
-    result = <double>(unit * (ordinal - start_ord))
-    return result
+    sub = ordinal - unix_date * 24 * 3600 * 10**9 / factor
+    return sub * factor
 
 
 cdef int get_yq(int64_t ordinal, int freq, int *quarter, int *year):
@@ -1176,11 +1150,7 @@ cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1:
     if ordinal == NPY_NAT:
         return NPY_NAT
 
-    if freq == 11000:
-        # Microsecond, avoid get_date_info to prevent floating point errors
-        pandas_datetime_to_datetimestruct(ordinal, NPY_FR_us, &dts)
-    else:
-        get_date_info(ordinal, freq, &dts)
+    get_date_info(ordinal, freq, &dts)
 
     check_dts_bounds(&dts)
     return dtstruct_to_dt64(&dts)
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 7384dfd20ebfc..ece92acae6461 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -1291,12 +1291,11 @@ def _addsub_object_array(self, other: np.ndarray, op):
         result : same class as self
         """
         assert op in [operator.add, operator.sub]
-        if len(other) == 1 and self.ndim == other.ndim == 1:
-            # If both 1D then broadcasting is unambiguous
+        if len(other) == 1:
             return op(self, other[0])
 
         warnings.warn(
-            "Adding/subtracting object-dtype array to "
+            "Adding/subtracting array of DateOffsets to "
             f"{type(self).__name__} not vectorized",
             PerformanceWarning,
         )
@@ -1304,7 +1303,7 @@ def _addsub_object_array(self, other: np.ndarray, op):
         # Caller is responsible for broadcasting if necessary
         assert self.shape == other.shape, (self.shape, other.shape)
 
-        res_values = op(self.astype("O"), np.asarray(other))
+        res_values = op(self.astype("O"), np.array(other))
         result = array(res_values.ravel())
         result = extract_array(result, extract_numpy=True).reshape(self.shape)
         return result
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 66a728c8ef208..85bb47485a2e7 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -452,7 +452,6 @@ def __init__(
             mgr = self._init_mgr(
                 data, axes=dict(index=index, columns=columns), dtype=dtype, copy=copy
             )
-
         elif isinstance(data, dict):
             mgr = init_dict(data, index, columns, dtype=dtype)
         elif isinstance(data, ma.MaskedArray):
@@ -2587,7 +2586,7 @@ def _ixs(self, i: int, axis: int = 0):
             label = self.columns[i]
 
             values = self._mgr.iget(i)
-            result = self._box_col_values(values, label)
+            result = self._box_col_values(values, i)
 
             # this is a cached value, mark it so
             result._set_as_cached(label, self)
@@ -2692,7 +2691,7 @@ def _getitem_bool_array(self, key):
     def _getitem_multilevel(self, key):
         # self.columns is a MultiIndex
         loc = self.columns.get_loc(key)
-        if isinstance(loc, (slice, Series, np.ndarray, Index)):
+        if isinstance(loc, (slice, np.ndarray)):
             new_columns = self.columns[loc]
             result_columns = maybe_droplevels(new_columns, key)
             if self._is_mixed_type:
@@ -2725,7 +2724,8 @@ def _getitem_multilevel(self, key):
             result._set_is_copy(self)
             return result
         else:
-            return self._get_item_cache(key)
+            # loc is neither a slice nor ndarray, so must be an int
+            return self._ixs(loc, axis=1)
 
     def _get_value(self, index, col, takeable: bool = False):
         """
@@ -2916,19 +2916,15 @@ def _ensure_valid_index(self, value):
                 value.index.copy(), axis=1, fill_value=np.nan
             )
 
-    def _box_item_values(self, key, values):
-        items = self.columns[self.columns.get_loc(key)]
-        if values.ndim == 2:
-            return self._constructor(values.T, columns=items, index=self.index)
-        else:
-            return self._box_col_values(values, items)
-
-    def _box_col_values(self, values, items):
+    def _box_col_values(self, values, loc: int) -> Series:
         """
         Provide boxed values for a column.
         """
+        # Lookup in columns so that if e.g. a str datetime was passed
+        #  we attach the Timestamp object as the name.
+        name = self.columns[loc]
         klass = self._constructor_sliced
-        return klass(values, index=self.index, name=items, fastpath=True)
+        return klass(values, index=self.index, name=name, fastpath=True)
 
     # ----------------------------------------------------------------------
     # Unsorted
@@ -5536,11 +5532,10 @@ def _construct_result(self, result) -> "DataFrame":
         -------
         DataFrame
         """
-        out = self._constructor(result, copy=False)
+        out = self._constructor(result, index=self.index, copy=False)
         # Pin columns instead of passing to constructor for compat with
         #  non-unique columns case
         out.columns = self.columns
-        out.index = self.index
         return out
 
     def combine(
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 6a4f83427310e..2878204f5ee79 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3558,8 +3558,13 @@ def _get_item_cache(self, item):
         cache = self._item_cache
         res = cache.get(item)
         if res is None:
-            values = self._mgr.get(item)
-            res = self._box_item_values(item, values)
+            # All places that call _get_item_cache have unique columns,
+            #  pending resolution of GH#33047
+
+            loc = self.columns.get_loc(item)
+            values = self._mgr.iget(loc)
+            res = self._box_col_values(values, loc)
+
             cache[item] = res
             res._set_as_cached(item, self)
 
@@ -3567,9 +3572,6 @@ def _get_item_cache(self, item):
             res._is_copy = self._is_copy
         return res
 
-    def _box_item_values(self, key, values):
-        raise AbstractMethodError(self)
-
     def _slice(self: FrameOrSeries, slobj: slice, axis=0) -> FrameOrSeries:
         """
         Construct a slice of this container.
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 635bf32639075..ba1a9a4e08fa0 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -25,6 +25,7 @@
 from pandas.core.algorithms import take_1d
 from pandas.core.arrays.categorical import Categorical, contains, recode_for_categories
 import pandas.core.common as com
+from pandas.core.construction import extract_array
 import pandas.core.indexes.base as ibase
 from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name
 from pandas.core.indexes.extension import ExtensionIndex, inherit_names
@@ -198,8 +199,13 @@ def __new__(
                 data = []
 
         assert isinstance(dtype, CategoricalDtype), dtype
-        if not isinstance(data, Categorical) or data.dtype != dtype:
+        data = extract_array(data, extract_numpy=True)
+
+        if not isinstance(data, Categorical):
             data = Categorical(data, dtype=dtype)
+        elif isinstance(dtype, CategoricalDtype) and dtype != data.dtype:
+            # we want to silently ignore dtype='category'
+            data = data._set_dtype(dtype)
 
         data = data.copy() if copy else data
 
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index b74399ed86fbd..dd072cf00ed20 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -2045,6 +2045,7 @@ def __setitem__(self, key, value):
             key = _tuplify(self.ndim, key)
         if len(key) != self.ndim:
             raise ValueError("Not enough indexers for scalar access (setting)!")
+
         key = list(self._convert_key(key, is_setter=True))
         self.obj._set_value(*key, value=value, takeable=self._takeable)
 
@@ -2064,15 +2065,32 @@ def _convert_key(self, key, is_setter: bool = False):
 
         return key
 
+    @property
+    def _axes_are_unique(self) -> bool:
+        # Only relevant for self.ndim == 2
+        assert self.ndim == 2
+        return self.obj.index.is_unique and self.obj.columns.is_unique
+
     def __getitem__(self, key):
-        if self.ndim != 1 or not is_scalar(key):
-            # FIXME: is_scalar check is a kludge
-            return super().__getitem__(key)
 
-        # Like Index.get_value, but we do not allow positional fallback
-        obj = self.obj
-        loc = obj.index.get_loc(key)
-        return obj.index._get_values_for_loc(obj, loc, key)
+        if self.ndim == 2 and not self._axes_are_unique:
+            # GH#33041 fall back to .loc
+            if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):
+                raise ValueError("Invalid call for scalar access (getting)!")
+            return self.obj.loc[key]
+
+        return super().__getitem__(key)
+
+    def __setitem__(self, key, value):
+        if self.ndim == 2 and not self._axes_are_unique:
+            # GH#33041 fall back to .loc
+            if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):
+                raise ValueError("Invalid call for scalar access (setting)!")
+
+            self.obj.loc[key] = value
+            return
+
+        return super().__setitem__(key, value)
 
 
 @doc(IndexingMixin.iat)
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 185b0f4da2627..ebdc331a673ab 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -48,7 +48,6 @@
     is_timedelta64_dtype,
     pandas_dtype,
 )
-from pandas.core.dtypes.concat import concat_categorical, concat_datetime
 from pandas.core.dtypes.dtypes import ExtensionDtype
 from pandas.core.dtypes.generic import (
     ABCDataFrame,
@@ -110,7 +109,6 @@ class Block(PandasObject):
     _can_consolidate = True
     _verify_integrity = True
     _validate_ndim = True
-    _concatenator = staticmethod(np.concatenate)
 
     def __init__(self, values, placement, ndim=None):
         self.ndim = self._check_ndim(values, ndim)
@@ -309,16 +307,6 @@ def shape(self):
     def dtype(self):
         return self.values.dtype
 
-    def concat_same_type(self, to_concat):
-        """
-        Concatenate list of single blocks of the same type.
-        """
-        values = self._concatenator(
-            [blk.values for blk in to_concat], axis=self.ndim - 1
-        )
-        placement = self.mgr_locs if self.ndim == 2 else slice(len(values))
-        return self.make_block_same_class(values, placement=placement)
-
     def iget(self, i):
         return self.values[i]
 
@@ -1770,14 +1758,6 @@ def _slice(self, slicer):
 
         return self.values[slicer]
 
-    def concat_same_type(self, to_concat):
-        """
-        Concatenate list of single blocks of the same type.
-        """
-        values = self._holder._concat_same_type([blk.values for blk in to_concat])
-        placement = self.mgr_locs if self.ndim == 2 else slice(len(values))
-        return self.make_block_same_class(values, placement=placement)
-
     def fillna(self, value, limit=None, inplace=False, downcast=None):
         values = self.values if inplace else self.values.copy()
         values = values.fillna(value=value, limit=limit)
@@ -2258,20 +2238,6 @@ def diff(self, n: int, axis: int = 0) -> List["Block"]:
         new_values = new_values.astype("timedelta64[ns]")
         return [TimeDeltaBlock(new_values, placement=self.mgr_locs.indexer)]
 
-    def concat_same_type(self, to_concat):
-        # need to handle concat([tz1, tz2]) here, since DatetimeArray
-        # only handles cases where all the tzs are the same.
-        # Instead of placing the condition here, it could also go into the
-        # is_uniform_join_units check, but I'm not sure what is better.
-        if len({x.dtype for x in to_concat}) > 1:
-            values = concat_datetime([x.values for x in to_concat])
-
-            values = values.astype(object, copy=False)
-            placement = self.mgr_locs if self.ndim == 2 else slice(len(values))
-
-            return self.make_block(values, placement=placement)
-        return super().concat_same_type(to_concat)
-
     def fillna(self, value, limit=None, inplace=False, downcast=None):
         # We support filling a DatetimeTZ with a `value` whose timezone
         # is different by coercing to object.
@@ -2642,7 +2608,6 @@ class CategoricalBlock(ExtensionBlock):
     is_categorical = True
     _verify_integrity = True
     _can_hold_na = True
-    _concatenator = staticmethod(concat_categorical)
 
     should_store = Block.should_store
 
@@ -2656,26 +2621,6 @@ def __init__(self, values, placement, ndim=None):
     def _holder(self):
         return Categorical
 
-    def concat_same_type(self, to_concat):
-        """
-        Concatenate list of single blocks of the same type.
-
-        Note that this CategoricalBlock._concat_same_type *may* not
-        return a CategoricalBlock. When the categories in `to_concat`
-        differ, this will return an object ndarray.
-
-        If / when we decide we don't like that behavior:
-
-        1. Change Categorical._concat_same_type to use union_categoricals
-        2. Delete this method.
-        """
-        values = self._concatenator(
-            [blk.values for blk in to_concat], axis=self.ndim - 1
-        )
-        placement = self.mgr_locs if self.ndim == 2 else slice(len(values))
-        # not using self.make_block_same_class as values can be object dtype
-        return self.make_block(values, placement=placement)
-
     def replace(
         self,
         to_replace,
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
index 720e6799a3bf3..37e081aeba3f6 100644
--- a/pandas/core/internals/concat.py
+++ b/pandas/core/internals/concat.py
@@ -1,6 +1,7 @@
 # TODO: Needs a better name; too many modules are already called "concat"
 from collections import defaultdict
 import copy
+from typing import List
 
 import numpy as np
 
@@ -61,8 +62,18 @@ def concatenate_block_managers(
                 values = values.view()
             b = b.make_block_same_class(values, placement=placement)
         elif _is_uniform_join_units(join_units):
-            b = join_units[0].block.concat_same_type([ju.block for ju in join_units])
-            b.mgr_locs = placement
+            blk = join_units[0].block
+            vals = [ju.block.values for ju in join_units]
+
+            if not blk.is_extension or blk.is_datetimetz or blk.is_categorical:
+                # datetimetz and categorical can have the same type but multiple
+                #  dtypes, concatting does not necessarily preserve dtype
+                values = concat_compat(vals, axis=blk.ndim - 1)
+            else:
+                # TODO(EA2D): special-casing not needed with 2D EAs
+                values = concat_compat(vals)
+
+            b = make_block(values, placement=placement, ndim=blk.ndim)
         else:
             b = make_block(
                 _concatenate_join_units(join_units, concat_axis, copy=copy),
@@ -419,13 +430,15 @@ def _get_empty_dtype_and_na(join_units):
     raise AssertionError(msg)
 
 
-def _is_uniform_join_units(join_units) -> bool:
+def _is_uniform_join_units(join_units: List[JoinUnit]) -> bool:
     """
     Check if the join units consist of blocks of uniform type that can
     be concatenated using Block.concat_same_type instead of the generic
     _concatenate_join_units (which uses `concat_compat`).
 
     """
+    # TODO: require dtype match in addition to same type?  e.g. DatetimeTZBlock
+    #  cannot necessarily join
     return (
         # all blocks need to have the same type
         all(type(ju.block) is type(join_units[0].block) for ju in join_units)
diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index 6b703dd7f40aa..9a7c9fdadf90d 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -4,13 +4,13 @@
 This is not a public API.
 """
 import operator
-from typing import TYPE_CHECKING, List, Optional, Set, Tuple
+from typing import TYPE_CHECKING, Optional, Set
 
 import numpy as np
 
 from pandas._libs import lib
 from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op  # noqa:F401
-from pandas._typing import ArrayLike, Level
+from pandas._typing import Level
 from pandas.util._decorators import Appender
 
 from pandas.core.dtypes.common import is_list_like
@@ -57,7 +57,6 @@
 
 if TYPE_CHECKING:
     from pandas import DataFrame  # noqa:F401
-    from pandas.core.internals.blocks import Block  # noqa: F401
 
 # -----------------------------------------------------------------------------
 # constants
@@ -294,90 +293,6 @@ def fill_binop(left, right, fill_value):
 # Dispatch logic
 
 
-def operate_blockwise(left, right, array_op):
-    assert right._indexed_same(left)
-
-    def get_same_shape_values(
-        lblk: "Block", rblk: "Block", left_ea: bool, right_ea: bool
-    ) -> Tuple[ArrayLike, ArrayLike]:
-        """
-        Slice lblk.values to align with rblk.  Squeeze if we have EAs.
-        """
-        lvals = lblk.values
-        rvals = rblk.values
-
-        # TODO(EA2D): with 2D EAs pnly this first clause would be needed
-        if not (left_ea or right_ea):
-            lvals = lvals[rblk.mgr_locs.indexer, :]
-            assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
-        elif left_ea and right_ea:
-            assert lvals.shape == rvals.shape, (lvals.shape, rvals.shape)
-        elif right_ea:
-            # lvals are 2D, rvals are 1D
-            lvals = lvals[rblk.mgr_locs.indexer, :]
-            assert lvals.shape[0] == 1, lvals.shape
-            lvals = lvals[0, :]
-        else:
-            # lvals are 1D, rvals are 2D
-            assert rvals.shape[0] == 1, rvals.shape
-            rvals = rvals[0, :]
-
-        return lvals, rvals
-
-    res_blks: List["Block"] = []
-    rmgr = right._mgr
-    for n, blk in enumerate(left._mgr.blocks):
-        locs = blk.mgr_locs
-        blk_vals = blk.values
-
-        left_ea = not isinstance(blk_vals, np.ndarray)
-
-        # TODO: joris says this is costly, see if we can optimize
-        rblks = rmgr._slice_take_blocks_ax0(locs.indexer)
-
-        # Assertions are disabled for performance, but should hold:
-        # if left_ea:
-        #    assert len(locs) == 1, locs
-        #    assert len(rblks) == 1, rblks
-        #    assert rblks[0].shape[0] == 1, rblks[0].shape
-
-        for k, rblk in enumerate(rblks):
-            right_ea = not isinstance(rblk.values, np.ndarray)
-
-            lvals, rvals = get_same_shape_values(blk, rblk, left_ea, right_ea)
-
-            res_values = array_op(lvals, rvals)
-            if left_ea and not right_ea and hasattr(res_values, "reshape"):
-                res_values = res_values.reshape(1, -1)
-            nbs = rblk._split_op_result(res_values)
-
-            # Assertions are disabled for performance, but should hold:
-            # if right_ea or left_ea:
-            #    assert len(nbs) == 1
-            # else:
-            #    assert res_values.shape == lvals.shape, (res_values.shape, lvals.shape)
-
-            for nb in nbs:
-                # Reset mgr_locs to correspond to our original DataFrame
-                nblocs = locs.as_array[nb.mgr_locs.indexer]
-                nb.mgr_locs = nblocs
-                # Assertions are disabled for performance, but should hold:
-                #  assert len(nblocs) == nb.shape[0], (len(nblocs), nb.shape)
-                #  assert all(x in locs.as_array for x in nb.mgr_locs.as_array)
-
-            res_blks.extend(nbs)
-
-    # Assertions are disabled for performance, but should hold:
-    #  slocs = {y for nb in res_blks for y in nb.mgr_locs.as_array}
-    #  nlocs = sum(len(nb.mgr_locs.as_array) for nb in res_blks)
-    #  assert nlocs == len(left.columns), (nlocs, len(left.columns))
-    #  assert len(slocs) == nlocs, (len(slocs), nlocs)
-    #  assert slocs == set(range(nlocs)), slocs
-
-    new_mgr = type(rmgr)(res_blks, axes=rmgr.axes, do_integrity_check=False)
-    return new_mgr
-
-
 def dispatch_to_series(left, right, func, str_rep=None, axis=None):
     """
     Evaluate the frame operation func(left, right) by evaluating
@@ -410,9 +325,8 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None):
     elif isinstance(right, ABCDataFrame):
         assert right._indexed_same(left)
 
-        array_op = get_array_op(func, str_rep=str_rep)
-        bm = operate_blockwise(left, right, array_op)
-        return type(left)(bm)
+        def column_op(a, b):
+            return {i: func(a.iloc[:, i], b.iloc[:, i]) for i in range(len(a.columns))}
 
     elif isinstance(right, ABCSeries) and axis == "columns":
         # We only get here if called via _combine_series_frame,
@@ -596,11 +510,13 @@ def _combine_series_frame(left, right, func, axis: int, str_rep: str):
     if axis == 0:
         values = right._values
         if isinstance(values, np.ndarray):
+            # TODO(EA2D): no need to special-case with 2D EAs
             # We can operate block-wise
             values = values.reshape(-1, 1)
+            values = np.broadcast_to(values, left.shape)
 
             array_op = get_array_op(func, str_rep=str_rep)
-            bm = left._mgr.apply(array_op, right=values.T)
+            bm = left._mgr.apply(array_op, right=values.T, align_keys=["right"])
             return type(left)(bm)
 
         new_data = dispatch_to_series(left, right, func)
diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py
index efd0500d351f4..a1d853e38e757 100644
--- a/pandas/core/ops/array_ops.py
+++ b/pandas/core/ops/array_ops.py
@@ -6,7 +6,6 @@
 from functools import partial
 import operator
 from typing import Any, Optional, Tuple
-import warnings
 
 import numpy as np
 
@@ -76,14 +75,7 @@ def masked_arith_op(x: np.ndarray, y, op):
         result = np.empty(x.size, dtype=dtype)
 
         if len(x) != len(y):
-            if not _can_broadcast(x, y):
-                raise ValueError(x.shape, y.shape)
-
-            # Call notna on pre-broadcasted y for performance
-            ymask = notna(y)
-            y = np.broadcast_to(y, x.shape)
-            ymask = np.broadcast_to(ymask, x.shape)
-
+            raise ValueError(x.shape, y.shape)
         else:
             ymask = notna(y)
 
@@ -128,7 +120,7 @@ def masked_arith_op(x: np.ndarray, y, op):
     return result
 
 
-def define_na_arithmetic_op(op, str_rep: Optional[str]):
+def define_na_arithmetic_op(op, str_rep: str):
     def na_op(x, y):
         return na_arithmetic_op(x, y, op, str_rep)
 
@@ -212,51 +204,6 @@ def arithmetic_op(left: ArrayLike, right: Any, op, str_rep: str):
     return res_values
 
 
-def _broadcast_comparison_op(lvalues, rvalues, op) -> np.ndarray:
-    """
-    Broadcast a comparison operation between two 2D arrays.
-
-    Parameters
-    ----------
-    lvalues : np.ndarray or ExtensionArray
-    rvalues : np.ndarray or ExtensionArray
-
-    Returns
-    -------
-    np.ndarray[bool]
-    """
-    if isinstance(rvalues, np.ndarray):
-        rvalues = np.broadcast_to(rvalues, lvalues.shape)
-        result = comparison_op(lvalues, rvalues, op)
-    else:
-        result = np.empty(lvalues.shape, dtype=bool)
-        for i in range(len(lvalues)):
-            result[i, :] = comparison_op(lvalues[i], rvalues[:, 0], op)
-    return result
-
-
-def _can_broadcast(lvalues, rvalues) -> bool:
-    """
-    Check if we can broadcast rvalues to match the shape of lvalues.
-
-    Parameters
-    ----------
-    lvalues : np.ndarray or ExtensionArray
-    rvalues : np.ndarray or ExtensionArray
-
-    Returns
-    -------
-    bool
-    """
-    # We assume that lengths dont match
-    if lvalues.ndim == rvalues.ndim == 2:
-        # See if we can broadcast unambiguously
-        if lvalues.shape[1] == rvalues.shape[-1]:
-            if rvalues.shape[0] == 1:
-                return True
-    return False
-
-
 def comparison_op(
     left: ArrayLike, right: Any, op, str_rep: Optional[str] = None,
 ) -> ArrayLike:
@@ -288,8 +235,6 @@ def comparison_op(
         #  We are not catching all listlikes here (e.g. frozenset, tuple)
         #  The ambiguous case is object-dtype.  See GH#27803
         if len(lvalues) != len(rvalues):
-            if _can_broadcast(lvalues, rvalues):
-                return _broadcast_comparison_op(lvalues, rvalues, op)
             raise ValueError(
                 "Lengths must match to compare", lvalues.shape, rvalues.shape
             )
@@ -309,13 +254,8 @@ def comparison_op(
         res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)
 
     else:
-        with warnings.catch_warnings():
-            # suppress warnings from numpy about element-wise comparison
-            warnings.simplefilter("ignore", DeprecationWarning)
-            with np.errstate(all="ignore"):
-                res_values = na_arithmetic_op(
-                    lvalues, rvalues, op, str_rep, is_cmp=True
-                )
+        with np.errstate(all="ignore"):
+            res_values = na_arithmetic_op(lvalues, rvalues, op, str_rep, is_cmp=True)
 
     return res_values
 
diff --git a/pandas/tests/arithmetic/common.py b/pandas/tests/arithmetic/common.py
index 755fbd0d9036c..ccc49adc5da82 100644
--- a/pandas/tests/arithmetic/common.py
+++ b/pandas/tests/arithmetic/common.py
@@ -70,14 +70,7 @@ def assert_invalid_comparison(left, right, box):
     result = right != left
     tm.assert_equal(result, ~expected)
 
-    msg = "|".join(
-        [
-            "Invalid comparison between",
-            "Cannot compare type",
-            "not supported between",
-            "invalid type promotion",
-        ]
-    )
+    msg = "Invalid comparison between|Cannot compare type|not supported between"
     with pytest.raises(TypeError, match=msg):
         left < right
     with pytest.raises(TypeError, match=msg):
diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
index db2ac092a5750..56c5647d865d3 100644
--- a/pandas/tests/arithmetic/test_datetime64.py
+++ b/pandas/tests/arithmetic/test_datetime64.py
@@ -964,9 +964,7 @@ def test_dt64arr_sub_dt64object_array(self, box_with_array, tz_naive_fixture):
         obj = tm.box_expected(dti, box_with_array)
         expected = tm.box_expected(expected, box_with_array)
 
-        warn = None
-        if box_with_array is not pd.DataFrame or tz_naive_fixture is None:
-            warn = PerformanceWarning
+        warn = PerformanceWarning if box_with_array is not pd.DataFrame else None
         with tm.assert_produces_warning(warn):
             result = obj - obj.astype(object)
         tm.assert_equal(result, expected)
@@ -1446,7 +1444,8 @@ def test_dt64arr_add_mixed_offset_array(self, box_with_array):
         s = DatetimeIndex([Timestamp("2000-1-1"), Timestamp("2000-2-1")])
         s = tm.box_expected(s, box_with_array)
 
-        with tm.assert_produces_warning(PerformanceWarning):
+        warn = None if box_with_array is pd.DataFrame else PerformanceWarning
+        with tm.assert_produces_warning(warn):
             other = pd.Index([pd.offsets.DateOffset(years=1), pd.offsets.MonthEnd()])
             other = tm.box_expected(other, box_with_array)
             result = s + other
diff --git a/pandas/tests/extension/test_external_block.py b/pandas/tests/extension/test_external_block.py
index 9925fd51561ae..1843126898f3d 100644
--- a/pandas/tests/extension/test_external_block.py
+++ b/pandas/tests/extension/test_external_block.py
@@ -32,12 +32,6 @@ def df():
     return pd.DataFrame(block_manager)
 
 
-def test_concat_dataframe(df):
-    # GH17728
-    res = pd.concat([df, df])
-    assert isinstance(res._mgr.blocks[1], CustomBlock)
-
-
 def test_concat_axis1(df):
     # GH17954
     df2 = pd.DataFrame({"c": [0.1, 0.2, 0.3]})
diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
index 4149485be181d..ec8613faaa663 100644
--- a/pandas/tests/frame/test_api.py
+++ b/pandas/tests/frame/test_api.py
@@ -529,7 +529,18 @@ async def test_tab_complete_warning(self, ip):
 
         code = "import pandas as pd; df = pd.DataFrame()"
         await ip.run_code(code)
-        with tm.assert_produces_warning(None):
+
+        # TODO: remove it when Ipython updates
+        # GH 33567, jedi version raises Deprecation warning in Ipython
+        import jedi
+
+        if jedi.__version__ < "0.17.0":
+            warning = tm.assert_produces_warning(None)
+        else:
+            warning = tm.assert_produces_warning(
+                DeprecationWarning, check_stacklevel=False
+            )
+        with warning:
             with provisionalcompleter("ignore"):
                 list(ip.Completer.completions("df.", 1))
 
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
index 11dbeb93eee37..d929d3e030508 100644
--- a/pandas/tests/frame/test_arithmetic.py
+++ b/pandas/tests/frame/test_arithmetic.py
@@ -49,11 +49,9 @@ def check(df, df2):
                 )
                 tm.assert_frame_equal(result, expected)
 
-                msgs = [
-                    r"Invalid comparison between dtype=datetime64\[ns\] and ndarray",
-                    "invalid type promotion",
-                ]
-                msg = "|".join(msgs)
+                msg = re.escape(
+                    "Invalid comparison between dtype=datetime64[ns] and ndarray"
+                )
                 with pytest.raises(TypeError, match=msg):
                     x >= y
                 with pytest.raises(TypeError, match=msg):
diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py
index 61d109344568c..216d554e22b49 100644
--- a/pandas/tests/indexing/test_scalar.py
+++ b/pandas/tests/indexing/test_scalar.py
@@ -128,6 +128,46 @@ def test_imethods_with_dups(self):
         result = df.iat[2, 0]
         assert result == 2
 
+    def test_frame_at_with_duplicate_axes(self):
+        # GH#33041
+        arr = np.random.randn(6).reshape(3, 2)
+        df = DataFrame(arr, columns=["A", "A"])
+
+        result = df.at[0, "A"]
+        expected = df.iloc[0]
+
+        tm.assert_series_equal(result, expected)
+
+        result = df.T.at["A", 0]
+        tm.assert_series_equal(result, expected)
+
+        # setter
+        df.at[1, "A"] = 2
+        expected = Series([2.0, 2.0], index=["A", "A"], name=1)
+        tm.assert_series_equal(df.iloc[1], expected)
+
+    def test_frame_at_with_duplicate_axes_requires_scalar_lookup(self):
+        # GH#33041 check that falling back to loc doesn't allow non-scalar
+        #  args to slip in
+
+        arr = np.random.randn(6).reshape(3, 2)
+        df = DataFrame(arr, columns=["A", "A"])
+
+        msg = "Invalid call for scalar access"
+        with pytest.raises(ValueError, match=msg):
+            df.at[[1, 2]]
+        with pytest.raises(ValueError, match=msg):
+            df.at[1, ["A"]]
+        with pytest.raises(ValueError, match=msg):
+            df.at[:, "A"]
+
+        with pytest.raises(ValueError, match=msg):
+            df.at[[1, 2]] = 1
+        with pytest.raises(ValueError, match=msg):
+            df.at[1, ["A"]] = 1
+        with pytest.raises(ValueError, match=msg):
+            df.at[:, "A"] = 1
+
     def test_series_at_raises_type_error(self):
         # at should not fallback
         # GH 7814
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index 58a84f5a267bc..f1d4c865a0ced 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -329,45 +329,48 @@ def test_categorical_block_pickle(self):
         smgr2 = tm.round_trip_pickle(smgr)
         tm.assert_series_equal(Series(smgr), Series(smgr2))
 
-    def test_get(self):
+    def test_iget(self):
         cols = Index(list("abc"))
         values = np.random.rand(3, 3)
         block = make_block(values=values.copy(), placement=np.arange(3))
         mgr = BlockManager(blocks=[block], axes=[cols, np.arange(3)])
 
-        tm.assert_almost_equal(mgr.get("a").internal_values(), values[0])
-        tm.assert_almost_equal(mgr.get("b").internal_values(), values[1])
-        tm.assert_almost_equal(mgr.get("c").internal_values(), values[2])
+        tm.assert_almost_equal(mgr.iget(0).internal_values(), values[0])
+        tm.assert_almost_equal(mgr.iget(1).internal_values(), values[1])
+        tm.assert_almost_equal(mgr.iget(2).internal_values(), values[2])
 
     def test_set(self):
         mgr = create_mgr("a,b,c: int", item_shape=(3,))
 
         mgr.set("d", np.array(["foo"] * 3))
         mgr.set("b", np.array(["bar"] * 3))
-        tm.assert_numpy_array_equal(mgr.get("a").internal_values(), np.array([0] * 3))
+        tm.assert_numpy_array_equal(mgr.iget(0).internal_values(), np.array([0] * 3))
         tm.assert_numpy_array_equal(
-            mgr.get("b").internal_values(), np.array(["bar"] * 3, dtype=np.object_)
+            mgr.iget(1).internal_values(), np.array(["bar"] * 3, dtype=np.object_)
         )
-        tm.assert_numpy_array_equal(mgr.get("c").internal_values(), np.array([2] * 3))
+        tm.assert_numpy_array_equal(mgr.iget(2).internal_values(), np.array([2] * 3))
         tm.assert_numpy_array_equal(
-            mgr.get("d").internal_values(), np.array(["foo"] * 3, dtype=np.object_)
+            mgr.iget(3).internal_values(), np.array(["foo"] * 3, dtype=np.object_)
         )
 
     def test_set_change_dtype(self, mgr):
         mgr.set("baz", np.zeros(N, dtype=bool))
 
         mgr.set("baz", np.repeat("foo", N))
-        assert mgr.get("baz").dtype == np.object_
+        idx = mgr.items.get_loc("baz")
+        assert mgr.iget(idx).dtype == np.object_
 
         mgr2 = mgr.consolidate()
         mgr2.set("baz", np.repeat("foo", N))
-        assert mgr2.get("baz").dtype == np.object_
+        idx = mgr2.items.get_loc("baz")
+        assert mgr2.iget(idx).dtype == np.object_
 
         mgr2.set("quux", tm.randn(N).astype(int))
-        assert mgr2.get("quux").dtype == np.int_
+        idx = mgr2.items.get_loc("quux")
+        assert mgr2.iget(idx).dtype == np.int_
 
         mgr2.set("quux", tm.randn(N))
-        assert mgr2.get("quux").dtype == np.float_
+        assert mgr2.iget(idx).dtype == np.float_
 
     def test_copy(self, mgr):
         cp = mgr.copy(deep=False)
@@ -430,8 +433,8 @@ def test_as_array_datetime(self):
 
     def test_as_array_datetime_tz(self):
         mgr = create_mgr("h: M8[ns, US/Eastern]; g: M8[ns, CET]")
-        assert mgr.get("h").dtype == "datetime64[ns, US/Eastern]"
-        assert mgr.get("g").dtype == "datetime64[ns, CET]"
+        assert mgr.iget(0).dtype == "datetime64[ns, US/Eastern]"
+        assert mgr.iget(1).dtype == "datetime64[ns, CET]"
         assert mgr.as_array().dtype == "object"
 
     @pytest.mark.parametrize("t", ["float16", "float32", "float64", "int32", "int64"])
@@ -441,26 +444,26 @@ def test_astype(self, t):
 
         t = np.dtype(t)
         tmgr = mgr.astype(t)
-        assert tmgr.get("c").dtype.type == t
-        assert tmgr.get("d").dtype.type == t
-        assert tmgr.get("e").dtype.type == t
+        assert tmgr.iget(0).dtype.type == t
+        assert tmgr.iget(1).dtype.type == t
+        assert tmgr.iget(2).dtype.type == t
 
         # mixed
         mgr = create_mgr("a,b: object; c: bool; d: datetime; e: f4; f: f2; g: f8")
 
         t = np.dtype(t)
         tmgr = mgr.astype(t, errors="ignore")
-        assert tmgr.get("c").dtype.type == t
-        assert tmgr.get("e").dtype.type == t
-        assert tmgr.get("f").dtype.type == t
-        assert tmgr.get("g").dtype.type == t
+        assert tmgr.iget(2).dtype.type == t
+        assert tmgr.iget(4).dtype.type == t
+        assert tmgr.iget(5).dtype.type == t
+        assert tmgr.iget(6).dtype.type == t
 
-        assert tmgr.get("a").dtype.type == np.object_
-        assert tmgr.get("b").dtype.type == np.object_
+        assert tmgr.iget(0).dtype.type == np.object_
+        assert tmgr.iget(1).dtype.type == np.object_
         if t != np.int64:
-            assert tmgr.get("d").dtype.type == np.datetime64
+            assert tmgr.iget(3).dtype.type == np.datetime64
         else:
-            assert tmgr.get("d").dtype.type == t
+            assert tmgr.iget(3).dtype.type == t
 
     def test_convert(self):
         def _compare(old_mgr, new_mgr):
@@ -497,11 +500,11 @@ def _compare(old_mgr, new_mgr):
         mgr.set("b", np.array(["2."] * N, dtype=np.object_))
         mgr.set("foo", np.array(["foo."] * N, dtype=np.object_))
         new_mgr = mgr.convert(numeric=True)
-        assert new_mgr.get("a").dtype == np.int64
-        assert new_mgr.get("b").dtype == np.float64
-        assert new_mgr.get("foo").dtype == np.object_
-        assert new_mgr.get("f").dtype == np.int64
-        assert new_mgr.get("g").dtype == np.float64
+        assert new_mgr.iget(0).dtype == np.int64
+        assert new_mgr.iget(1).dtype == np.float64
+        assert new_mgr.iget(2).dtype == np.object_
+        assert new_mgr.iget(3).dtype == np.int64
+        assert new_mgr.iget(4).dtype == np.float64
 
         mgr = create_mgr(
             "a,b,foo: object; f: i4; bool: bool; dt: datetime; i: i8; g: f8; h: f2"
@@ -510,15 +513,15 @@ def _compare(old_mgr, new_mgr):
         mgr.set("b", np.array(["2."] * N, dtype=np.object_))
         mgr.set("foo", np.array(["foo."] * N, dtype=np.object_))
         new_mgr = mgr.convert(numeric=True)
-        assert new_mgr.get("a").dtype == np.int64
-        assert new_mgr.get("b").dtype == np.float64
-        assert new_mgr.get("foo").dtype == np.object_
-        assert new_mgr.get("f").dtype == np.int32
-        assert new_mgr.get("bool").dtype == np.bool_
-        assert new_mgr.get("dt").dtype.type, np.datetime64
-        assert new_mgr.get("i").dtype == np.int64
-        assert new_mgr.get("g").dtype == np.float64
-        assert new_mgr.get("h").dtype == np.float16
+        assert new_mgr.iget(0).dtype == np.int64
+        assert new_mgr.iget(1).dtype == np.float64
+        assert new_mgr.iget(2).dtype == np.object_
+        assert new_mgr.iget(3).dtype == np.int32
+        assert new_mgr.iget(4).dtype == np.bool_
+        assert new_mgr.iget(5).dtype.type, np.datetime64
+        assert new_mgr.iget(6).dtype == np.int64
+        assert new_mgr.iget(7).dtype == np.float64
+        assert new_mgr.iget(8).dtype == np.float16
 
     def test_invalid_ea_block(self):
         with pytest.raises(AssertionError, match="block.size != values.size"):
@@ -620,16 +623,16 @@ def test_reindex_items(self):
         assert reindexed.nblocks == 2
         tm.assert_index_equal(reindexed.items, pd.Index(["g", "c", "a", "d"]))
         tm.assert_almost_equal(
-            mgr.get("g").internal_values(), reindexed.get("g").internal_values()
+            mgr.iget(6).internal_values(), reindexed.iget(0).internal_values()
         )
         tm.assert_almost_equal(
-            mgr.get("c").internal_values(), reindexed.get("c").internal_values()
+            mgr.iget(2).internal_values(), reindexed.iget(1).internal_values()
         )
         tm.assert_almost_equal(
-            mgr.get("a").internal_values(), reindexed.get("a").internal_values()
+            mgr.iget(0).internal_values(), reindexed.iget(2).internal_values()
         )
         tm.assert_almost_equal(
-            mgr.get("d").internal_values(), reindexed.get("d").internal_values()
+            mgr.iget(3).internal_values(), reindexed.iget(3).internal_values()
         )
 
     def test_get_numeric_data(self):
@@ -645,13 +648,15 @@ def test_get_numeric_data(self):
             numeric.items, pd.Index(["int", "float", "complex", "bool"])
         )
         tm.assert_almost_equal(
-            mgr.get("float").internal_values(), numeric.get("float").internal_values()
+            mgr.iget(mgr.items.get_loc("float")).internal_values(),
+            numeric.iget(numeric.items.get_loc("float")).internal_values(),
         )
 
         # Check sharing
         numeric.set("float", np.array([100.0, 200.0, 300.0]))
         tm.assert_almost_equal(
-            mgr.get("float").internal_values(), np.array([100.0, 200.0, 300.0])
+            mgr.iget(mgr.items.get_loc("float")).internal_values(),
+            np.array([100.0, 200.0, 300.0]),
         )
 
         numeric2 = mgr.get_numeric_data(copy=True)
@@ -660,7 +665,8 @@ def test_get_numeric_data(self):
         )
         numeric2.set("float", np.array([1000.0, 2000.0, 3000.0]))
         tm.assert_almost_equal(
-            mgr.get("float").internal_values(), np.array([100.0, 200.0, 300.0])
+            mgr.iget(mgr.items.get_loc("float")).internal_values(),
+            np.array([100.0, 200.0, 300.0]),
         )
 
     def test_get_bool_data(self):
@@ -674,19 +680,22 @@ def test_get_bool_data(self):
         bools = mgr.get_bool_data()
         tm.assert_index_equal(bools.items, pd.Index(["bool"]))
         tm.assert_almost_equal(
-            mgr.get("bool").internal_values(), bools.get("bool").internal_values()
+            mgr.iget(mgr.items.get_loc("bool")).internal_values(),
+            bools.iget(bools.items.get_loc("bool")).internal_values(),
         )
 
         bools.set("bool", np.array([True, False, True]))
         tm.assert_numpy_array_equal(
-            mgr.get("bool").internal_values(), np.array([True, False, True])
+            mgr.iget(mgr.items.get_loc("bool")).internal_values(),
+            np.array([True, False, True]),
         )
 
         # Check sharing
         bools2 = mgr.get_bool_data(copy=True)
         bools2.set("bool", np.array([False, True, False]))
         tm.assert_numpy_array_equal(
-            mgr.get("bool").internal_values(), np.array([True, False, True])
+            mgr.iget(mgr.items.get_loc("bool")).internal_values(),
+            np.array([True, False, True]),
         )
 
     def test_unicode_repr_doesnt_raise(self):
diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
index 03c1445e099a0..035698687cfc2 100644
--- a/pandas/tests/resample/test_resampler_grouper.py
+++ b/pandas/tests/resample/test_resampler_grouper.py
@@ -28,7 +28,15 @@ async def test_tab_complete_ipython6_warning(ip):
     )
     await ip.run_code(code)
 
-    with tm.assert_produces_warning(None):
+    # TODO: remove it when Ipython updates
+    # GH 33567, jedi version raises Deprecation warning in Ipython
+    import jedi
+
+    if jedi.__version__ < "0.17.0":
+        warning = tm.assert_produces_warning(None)
+    else:
+        warning = tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False)
+    with warning:
         with provisionalcompleter("ignore"):
             list(ip.Completer.completions("rs.", 1))
 
diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py
index 302ca8d1aa43e..a6430b4525d4a 100644
--- a/pandas/tests/series/test_api.py
+++ b/pandas/tests/series/test_api.py
@@ -491,7 +491,18 @@ async def test_tab_complete_warning(self, ip):
 
         code = "import pandas as pd; s = pd.Series()"
         await ip.run_code(code)
-        with tm.assert_produces_warning(None):
+
+        # TODO: remove it when Ipython updates
+        # GH 33567, jedi version raises Deprecation warning in Ipython
+        import jedi
+
+        if jedi.__version__ < "0.17.0":
+            warning = tm.assert_produces_warning(None)
+        else:
+            warning = tm.assert_produces_warning(
+                DeprecationWarning, check_stacklevel=False
+            )
+        with warning:
             with provisionalcompleter("ignore"):
                 list(ip.Completer.completions("s.", 1))
 
diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py
index 17815c437249b..b7bdbde5bac5e 100644
--- a/pandas/util/_decorators.py
+++ b/pandas/util/_decorators.py
@@ -237,15 +237,13 @@ def _format_argument_list(allow_args: Union[List[str], int]):
     elif allow_args == 1:
         return " except for the first argument"
     elif isinstance(allow_args, int):
-        return " except for the first {num_args} arguments".format(num_args=allow_args)
+        return f" except for the first {allow_args} arguments"
     elif len(allow_args) == 1:
-        return " except for the argument '{arg}'".format(arg=allow_args[0])
+        return f" except for the argument '{allow_args[0]}'"
     else:
         last = allow_args[-1]
         args = ", ".join(["'" + x + "'" for x in allow_args[:-1]])
-        return " except for the arguments {args} and '{last}'".format(
-            args=args, last=last
-        )
+        return f" except for the arguments {args} and '{last}'"
 
 
 def deprecate_nonkeyword_arguments(

From df471d8a923502be2fbd0be6394915bcd3722496 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 16 Apr 2020 13:25:45 -0700
Subject: [PATCH 26/31] PERF: always slice when indexing on columns

---
 pandas/_libs/internals.pyx               | 64 ++++++++++++++++++++++--
 pandas/core/internals/managers.py        | 19 +++----
 pandas/tests/internals/test_internals.py | 10 +++-
 3 files changed, 80 insertions(+), 13 deletions(-)

diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx
index d3d8bead88d08..bc681570fef94 100644
--- a/pandas/_libs/internals.pyx
+++ b/pandas/_libs/internals.pyx
@@ -196,13 +196,27 @@ cdef class BlockPlacement:
 
         return self._as_slice
 
+    def to_slices(self):
+        """
+        Decompose a BlockPlacement into a list of BlockPlacements, each of
+        which is slice-like.
+
+        Returns
+        -------
+        List[BlockPlacement]
+        """
+        if self.is_slice_like:
+            return [self]
+        slices = indexer_as_slices(self.indexer)
+        return [BlockPlacement(x) for x in slices]
+
 
 cdef slice slice_canonize(slice s):
     """
     Convert slice to canonical bounded form.
     """
     cdef:
-        Py_ssize_t start = 0, stop = 0, step = 1, length
+        Py_ssize_t start = 0, stop = 0, step = 1
 
     if s.step is None:
         step = 1
@@ -234,8 +248,8 @@ cdef slice slice_canonize(slice s):
             if stop > start:
                 stop = start
 
-    if start < 0 or (stop < 0 and s.stop is not None):
-        raise ValueError("unbounded slice")
+    if start < 0 or (stop < 0 and s.stop is not None and step > 0):
+        raise ValueError("unbounded slice", start, stop, step, s)
 
     if stop < 0:
         return slice(start, None, step)
@@ -318,6 +332,50 @@ cdef slice_getitem(slice slc, ind):
         return cnp.PyArray_Arange(s_start, s_stop, s_step, NPY_INT64)[ind]
 
 
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef list indexer_as_slices(int64_t[:] vals):
+    """
+    Convert an indexer to a list of slices.
+    """
+    # TODO: there may be more efficient ways to decompose an indexer into slices
+    cdef:
+        Py_ssize_t i, n, start, stop
+        int64_t d
+
+    if vals is None:
+        raise TypeError("vals must be ndarray")
+
+    n = vals.shape[0]
+
+    if n == 0:
+        return []
+
+    if n == 1:
+        return [slice(vals[0], vals[0] + 1, 1)]
+
+    # n >= 2
+    d = vals[1] - vals[0]
+
+    if d == 0:
+        # i guess we have duplicate values (TODO: how?)
+        return [slice(vals[0], vals[0] + 1, 1)] + indexer_as_slices(vals[1:])
+
+    for i in range(2, n):
+        if vals[i] - vals[i - 1] != d:
+            # we need to start a new slice
+            start = vals[0]
+            stop = vals[i - 1] + d
+            return [slice(start, stop, d)] + indexer_as_slices(vals[i:])
+
+    start = vals[0]
+    stop = start + n * d
+    if stop < 0 and d < 0:
+        return [slice(start, None, d)]
+    else:
+        return [slice(start, stop, d)]
+
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
 cdef slice indexer_as_slice(int64_t[:] vals):
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index b57fe76846274..b1eab61fd256b 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1324,7 +1324,6 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default):
         for blkno, mgr_locs in libinternals.get_blkno_placements(blknos, group=True):
             if blkno == -1:
                 # If we've got here, fill_value was not lib.no_default
-
                 blocks.append(
                     self._make_na_block(placement=mgr_locs, fill_value=fill_value)
                 )
@@ -1343,15 +1342,17 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default):
 
                 else:
                     taker = blklocs[mgr_locs.indexer]
-                    max_len = max(len(mgr_locs), taker.max() + 1)
-                    taker = lib.maybe_indices_to_slice(taker, max_len)
 
-                    if isinstance(taker, slice):
-                        nb = blk.getitem_block(taker, new_mgr_locs=mgr_locs)
-                    else:
-                        # TODO: just use getitem_block anyway?
-                        nb = blk.take_nd(taker, axis=0, new_mgr_locs=mgr_locs)
-                    blocks.append(nb)
+                    bp = libinternals.BlockPlacement(taker)
+                    bps = bp.to_slices()
+
+                    left, right = 0, 0
+                    for sub in bps:
+                        right += len(sub)
+                        new_locs = mgr_locs[left:right]
+                        b = blk.getitem_block(sub.indexer, new_mgr_locs=new_locs)
+                        blocks.append(b)
+                        left += len(sub)
 
         return blocks
 
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index f1d4c865a0ced..14323ca624a6f 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -620,7 +620,8 @@ def test_reindex_items(self):
         mgr = create_mgr("a: f8; b: i8; c: f8; d: i8; e: f8; f: bool; g: f8-2")
 
         reindexed = mgr.reindex_axis(["g", "c", "a", "d"], axis=0)
-        assert reindexed.nblocks == 2
+        assert reindexed.nblocks == 3
+        assert all(b.mgr_locs.is_slice_like for b in reindexed)
         tm.assert_index_equal(reindexed.items, pd.Index(["g", "c", "a", "d"]))
         tm.assert_almost_equal(
             mgr.iget(6).internal_values(), reindexed.iget(0).internal_values()
@@ -957,6 +958,13 @@ def test_unbounded_slice_raises(self, slc):
         with pytest.raises(ValueError, match=msg):
             BlockPlacement(slc)
 
+    def test_slice_canonize_negative_stop(self):
+        # GH#???? negative stop is OK with negative step and positive start
+        slc = slice(3, -1, -2)
+
+        bp = BlockPlacement(slc)
+        assert bp.indexer == slice(3, None, -2)
+
     @pytest.mark.parametrize(
         "slc",
         [

From b323ea6d8a011779499f0c1d81fd06d8c4b6d35f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 16 Apr 2020 13:58:57 -0700
Subject: [PATCH 27/31] typo fixup

---
 pandas/tests/internals/test_internals.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index 14323ca624a6f..5673735d1c70c 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -621,7 +621,7 @@ def test_reindex_items(self):
 
         reindexed = mgr.reindex_axis(["g", "c", "a", "d"], axis=0)
         assert reindexed.nblocks == 3
-        assert all(b.mgr_locs.is_slice_like for b in reindexed)
+        assert all(b.mgr_locs.is_slice_like for b in reindexed.blocks)
         tm.assert_index_equal(reindexed.items, pd.Index(["g", "c", "a", "d"]))
         tm.assert_almost_equal(
             mgr.iget(6).internal_values(), reindexed.iget(0).internal_values()

From 96b7d6a05611c80f8a6999c7fab837f4cba44189 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 20 Apr 2020 17:55:31 -0700
Subject: [PATCH 28/31] avoid take in one more case

---
 pandas/core/internals/managers.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index a650392592091..a5df168fb78c4 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1258,7 +1258,10 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default):
                 if sllen == 0:
                     return []
                 return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))]
-            elif not allow_fill or self.ndim == 1:
+            elif not allow_fill:
+                return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))]
+
+            elif self.ndim == 1:
                 if allow_fill and fill_value is None:
                     _, fill_value = maybe_promote(blk.dtype)
 
@@ -1302,6 +1305,7 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_value=lib.no_default):
                     # A non-consolidatable block, it's easy, because there's
                     # only one item and each mgr loc is a copy of that single
                     # item.
+                    # TODO(EA2D): special case unnecessary with 2D EAs
                     for mgr_loc in mgr_locs:
                         newblk = blk.copy(deep=False)
                         newblk.mgr_locs = slice(mgr_loc, mgr_loc + 1)

From 7fad75264c231ae4cb6591b37b636d72cf72abd7 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 24 Jul 2021 12:58:56 -0700
Subject: [PATCH 29/31] update

---
 pandas/core/internals/managers.py      | 33 ++++++++++++++++----------
 pandas/tests/frame/indexing/test_xs.py |  5 ++--
 2 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index a888649d3ed98..6f39924ca14cd 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -711,6 +711,7 @@ def _slice_take_blocks_ax0(
         only_slice : bool, default False
             If True, we always return views on existing arrays, never copies.
             This is used when called from ops.blockwise.operate_blockwise.
+            Ignored; TODO: remove argument.
 
         Returns
         -------
@@ -736,7 +737,7 @@ def _slice_take_blocks_ax0(
                 if allow_fill and fill_value is None:
                     fill_value = blk.fill_value
 
-                if not allow_fill and only_slice:
+                if not allow_fill:
                     # GH#33597 slice instead of take, so we get
                     #  views instead of copies
                     blocks = [
@@ -773,8 +774,7 @@ def _slice_take_blocks_ax0(
         # When filling blknos, make sure blknos is updated before appending to
         # blocks list, that way new blkno is exactly len(blocks).
         blocks = []
-        group = not only_slice
-        for blkno, mgr_locs in libinternals.get_blkno_placements(blknos, group=group):
+        for blkno, mgr_locs in libinternals.get_blkno_placements(blknos, group=False):
             if blkno == -1:
                 # If we've got here, fill_value was not lib.no_default
 
@@ -800,24 +800,31 @@ def _slice_take_blocks_ax0(
                     #  we may try to only slice
                     taker = blklocs[mgr_locs.indexer]
                     max_len = max(len(mgr_locs), taker.max() + 1)
-                    if only_slice:
-                        taker = lib.maybe_indices_to_slice(taker, max_len)
+                    taker = lib.maybe_indices_to_slice(taker, max_len)
 
                     if isinstance(taker, slice):
+                        bp = libinternals.BlockPlacement(taker)
+                        bps = bp.to_slices()
+                        assert len(bps) == 1  # just checking
                         nb = blk.getitem_block_columns(taker, new_mgr_locs=mgr_locs)
                         blocks.append(nb)
-                    elif only_slice:
+                    else:
                         # GH#33597 slice instead of take, so we get
                         #  views instead of copies
-                        for i, ml in zip(taker, mgr_locs):
-                            slc = slice(i, i + 1)
-                            bp = BlockPlacement(ml)
-                            nb = blk.getitem_block_columns(slc, new_mgr_locs=bp)
+                        bp = libinternals.BlockPlacement(taker)
+                        bps = bp.to_slices()
+                        left, right = 0, 0
+
+                        for sub in bps:
+                            right += len(sub)
+                            new_locs = mgr_locs[left:right]
+                            # we have isinstance(sub.indexer, slice)
+                            nb = blk.getitem_block_columns(
+                                sub.indexer, new_mgr_locs=new_locs
+                            )
                             # We have np.shares_memory(nb.values, blk.values)
                             blocks.append(nb)
-                    else:
-                        nb = blk.take_nd(taker, axis=0, new_mgr_locs=mgr_locs)
-                        blocks.append(nb)
+                            left += len(sub)
 
         return blocks
 
diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py
index d2704876c31c5..8c7f05d40552d 100644
--- a/pandas/tests/frame/indexing/test_xs.py
+++ b/pandas/tests/frame/indexing/test_xs.py
@@ -374,12 +374,11 @@ def test_xs_droplevel_false_view(self, using_array_manager):
             expected = DataFrame({"a": [1]})
         tm.assert_frame_equal(result, expected)
 
-        # with mixed dataframe, modifying the parent doesn't modify result
-        # TODO the "split" path behaves differently here as with single block
+        # with mixed dataframe, we still get a view on the parent
         df = DataFrame([[1, 2.5, "a"]], columns=Index(["a", "b", "c"]))
         result = df.xs("a", axis=1, drop_level=False)
         df.iloc[0, 0] = 2
-        expected = DataFrame({"a": [1]})
+        expected = DataFrame({"a": [2]})
         tm.assert_frame_equal(result, expected)
 
     def test_xs_list_indexer_droplevel_false(self):

From 201e57da521de7efd12cd526e0263b0fca5d42d7 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 14 Aug 2021 12:07:14 -0700
Subject: [PATCH 30/31] fix last 2 failing tests

---
 pandas/core/frame.py              | 3 +++
 pandas/core/generic.py            | 1 -
 pandas/core/internals/managers.py | 2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 823de2133f0b3..2310ca52ec9ae 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4558,6 +4558,9 @@ def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
                 columns, method, copy, level, fill_value, limit, tolerance
             )
 
+            # If we have made a copy, no need to make another one
+            copy = False
+
         index = axes["index"]
         if index is not None:
             frame = frame._reindex_index(
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index b2a4fecbf084d..31e5303ccd975 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -4817,7 +4817,6 @@ def reindex(self: FrameOrSeries, *args, **kwargs) -> FrameOrSeries:
             axes, level, limit, tolerance, method, fill_value, copy
         ).__finalize__(self, method="reindex")
 
-    @final
     def _reindex_axes(
         self: FrameOrSeries, axes, level, limit, tolerance, method, fill_value, copy
     ) -> FrameOrSeries:
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 3d9d7b5f5f2a9..87bba084269b6 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -665,6 +665,8 @@ def reindex_indexer(
             new_blocks = self._slice_take_blocks_ax0(
                 indexer, fill_value=fill_value, only_slice=only_slice
             )
+            if copy:
+                new_blocks = [x.copy() for x in new_blocks]
         else:
             new_blocks = [
                 blk.take_nd(

From 69f08cb406ebfcbd2bc6524789ccc271a49f05e0 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 14 Aug 2021 17:03:39 -0700
Subject: [PATCH 31/31] mypy fixup

---
 pandas/_libs/internals.pyi | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/_libs/internals.pyi b/pandas/_libs/internals.pyi
index 3feefe7ac8ff4..b9f9a0948ff33 100644
--- a/pandas/_libs/internals.pyi
+++ b/pandas/_libs/internals.pyi
@@ -42,6 +42,7 @@ class BlockPlacement:
     def __len__(self) -> int: ...
     def delete(self, loc) -> BlockPlacement: ...
     def append(self, others: list[BlockPlacement]) -> BlockPlacement: ...
+    def to_slices(self) -> list[BlockPlacement]: ...
 
 class SharedBlock:
     _mgr_locs: BlockPlacement