From 4c5eddd63e94bacddb96bf61f81a6a8fcd9c33f0 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 20 Aug 2020 21:19:10 -0700
Subject: [PATCH 1/7] REF: remove unnecesary try/except

---
 pandas/core/groupby/generic.py | 69 ++++++++++++++++------------------
 1 file changed, 33 insertions(+), 36 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 166631e69f523..51532a75d2d4a 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -31,7 +31,7 @@
 import numpy as np
 
 from pandas._libs import lib
-from pandas._typing import FrameOrSeries, FrameOrSeriesUnion
+from pandas._typing import ArrayLike, FrameOrSeries, FrameOrSeriesUnion
 from pandas.util._decorators import Appender, Substitution, doc
 
 from pandas.core.dtypes.cast import (
@@ -60,6 +60,7 @@
     validate_func_kwargs,
 )
 import pandas.core.algorithms as algorithms
+from pandas.core.arrays import ExtensionArray
 from pandas.core.base import DataError, SpecificationError
 import pandas.core.common as com
 from pandas.core.construction import create_series_with_explicit_dtype
@@ -1034,32 +1035,31 @@ def _cython_agg_blocks(
 
         no_result = object()
 
-        def cast_result_block(result, block: "Block", how: str) -> "Block":
-            # see if we can cast the block to the desired dtype
+        def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike:
+            # see if we can cast the values to the desired dtype
             # this may not be the original dtype
             assert not isinstance(result, DataFrame)
             assert result is not no_result
 
-            dtype = maybe_cast_result_dtype(block.dtype, how)
+            dtype = maybe_cast_result_dtype(values.dtype, how)
             result = maybe_downcast_numeric(result, dtype)
 
-            if block.is_extension and isinstance(result, np.ndarray):
-                # e.g. block.values was an IntegerArray
-                # (1, N) case can occur if block.values was Categorical
+            if isinstance(values, ExtensionArray) and isinstance(result, np.ndarray):
+                # e.g. values was an IntegerArray
+                # (1, N) case can occur if values was Categorical
                 #  and result is ndarray[object]
                 # TODO(EA2D): special casing not needed with 2D EAs
                 assert result.ndim == 1 or result.shape[0] == 1
                 try:
                     # Cast back if feasible
-                    result = type(block.values)._from_sequence(
-                        result.ravel(), dtype=block.values.dtype
+                    result = type(values)._from_sequence(
+                        result.ravel(), dtype=values.dtype
                     )
                 except (ValueError, TypeError):
                     # reshape to be valid for non-Extension Block
                     result = result.reshape(1, -1)
 
-            agg_block: "Block" = block.make_block(result)
-            return agg_block
+            return result
 
         def blk_func(block: "Block") -> List["Block"]:
             new_blocks: List["Block"] = []
@@ -1093,33 +1093,30 @@ def blk_func(block: "Block") -> List["Block"]:
                 # Categoricals. This will done by later self._reindex_output()
                 # Doing it here creates an error. See GH#34951
                 sgb = get_groupby(obj, self.grouper, observed=True)
-                try:
-                    result = sgb.aggregate(lambda x: alt(x, axis=self.axis))
-                except TypeError:
-                    # we may have an exception in trying to aggregate
-                    # continue and exclude the block
-                    raise
+                result = sgb.aggregate(lambda x: alt(x, axis=self.axis))
+
+                result = cast(DataFrame, result)
+                # unwrap DataFrame to get array
+                if len(result._mgr.blocks) != 1:
+                    # We've split an object block! Everything we've assumed
+                    # about a single block input returning a single block output
+                    # is a lie. To keep the code-path for the typical non-split case
+                    # clean, we choose to clean up this mess later on.
+                    assert len(locs) == result.shape[1]
+                    for i, loc in enumerate(locs):
+                        agg_block = result.iloc[:, [i]]._mgr.blocks[0]
+                        agg_block.mgr_locs = [loc]
+                        new_blocks.append(agg_block)
                 else:
-                    result = cast(DataFrame, result)
-                    # unwrap DataFrame to get array
-                    if len(result._mgr.blocks) != 1:
-                        # We've split an object block! Everything we've assumed
-                        # about a single block input returning a single block output
-                        # is a lie. To keep the code-path for the typical non-split case
-                        # clean, we choose to clean up this mess later on.
-                        assert len(locs) == result.shape[1]
-                        for i, loc in enumerate(locs):
-                            agg_block = result.iloc[:, [i]]._mgr.blocks[0]
-                            agg_block.mgr_locs = [loc]
-                            new_blocks.append(agg_block)
-                    else:
-                        result = result._mgr.blocks[0].values
-                        if isinstance(result, np.ndarray) and result.ndim == 1:
-                            result = result.reshape(1, -1)
-                        agg_block = cast_result_block(result, block, how)
-                        new_blocks = [agg_block]
+                    result = result._mgr.blocks[0].values
+                    if isinstance(result, np.ndarray) and result.ndim == 1:
+                        result = result.reshape(1, -1)
+                    res_values = cast_agg_result(result, block.values, how)
+                    agg_block = block.make_block(res_values)
+                    new_blocks = [agg_block]
             else:
-                agg_block = cast_result_block(result, block, how)
+                res_values = cast_agg_result(result, block.values, how)
+                agg_block = block.make_block(res_values)
                 new_blocks = [agg_block]
             return new_blocks
 

From 42649fbb855a895ee5818d7dc80bdbd0ce0e9f5a Mon Sep 17 00:00:00 2001
From: Karthik Mathur <22126205+mathurk1@users.noreply.github.com>
Date: Fri, 21 Aug 2020 17:34:51 -0500
Subject: [PATCH 2/7] TST: add test for agg on ordered categorical cols
 (#35630)

---
 .../tests/groupby/aggregate/test_aggregate.py | 79 +++++++++++++++++++
 1 file changed, 79 insertions(+)

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index ce9d4b892d775..8fe450fe6abfc 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -1063,6 +1063,85 @@ def test_groupby_get_by_index():
     pd.testing.assert_frame_equal(res, expected)
 
 
+@pytest.mark.parametrize(
+    "grp_col_dict, exp_data",
+    [
+        ({"nr": "min", "cat_ord": "min"}, {"nr": [1, 5], "cat_ord": ["a", "c"]}),
+        ({"cat_ord": "min"}, {"cat_ord": ["a", "c"]}),
+        ({"nr": "min"}, {"nr": [1, 5]}),
+    ],
+)
+def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data):
+    # test single aggregations on ordered categorical cols GHGH27800
+
+    # create the result dataframe
+    input_df = pd.DataFrame(
+        {
+            "nr": [1, 2, 3, 4, 5, 6, 7, 8],
+            "cat_ord": list("aabbccdd"),
+            "cat": list("aaaabbbb"),
+        }
+    )
+
+    input_df = input_df.astype({"cat": "category", "cat_ord": "category"})
+    input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered()
+    result_df = input_df.groupby("cat").agg(grp_col_dict)
+
+    # create expected dataframe
+    cat_index = pd.CategoricalIndex(
+        ["a", "b"], categories=["a", "b"], ordered=False, name="cat", dtype="category"
+    )
+
+    expected_df = pd.DataFrame(data=exp_data, index=cat_index)
+
+    tm.assert_frame_equal(result_df, expected_df)
+
+
+@pytest.mark.parametrize(
+    "grp_col_dict, exp_data",
+    [
+        ({"nr": ["min", "max"], "cat_ord": "min"}, [(1, 4, "a"), (5, 8, "c")]),
+        ({"nr": "min", "cat_ord": ["min", "max"]}, [(1, "a", "b"), (5, "c", "d")]),
+        ({"cat_ord": ["min", "max"]}, [("a", "b"), ("c", "d")]),
+    ],
+)
+def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data):
+    # test combined aggregations on ordered categorical cols GH27800
+
+    # create the result dataframe
+    input_df = pd.DataFrame(
+        {
+            "nr": [1, 2, 3, 4, 5, 6, 7, 8],
+            "cat_ord": list("aabbccdd"),
+            "cat": list("aaaabbbb"),
+        }
+    )
+
+    input_df = input_df.astype({"cat": "category", "cat_ord": "category"})
+    input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered()
+    result_df = input_df.groupby("cat").agg(grp_col_dict)
+
+    # create expected dataframe
+    cat_index = pd.CategoricalIndex(
+        ["a", "b"], categories=["a", "b"], ordered=False, name="cat", dtype="category"
+    )
+
+    # unpack the grp_col_dict to create the multi-index tuple
+    # this tuple will be used to create the expected dataframe index
+    multi_index_list = []
+    for k, v in grp_col_dict.items():
+        if isinstance(v, list):
+            for value in v:
+                multi_index_list.append([k, value])
+        else:
+            multi_index_list.append([k, v])
+    multi_index = pd.MultiIndex.from_tuples(tuple(multi_index_list))
+
+    expected_df = pd.DataFrame(data=exp_data, columns=multi_index, index=cat_index)
+
+    tm.assert_frame_equal(result_df, expected_df)
+
+
 def test_nonagg_agg():
     # GH 35490 - Single/Multiple agg of non-agg function give same results
     # TODO: agg should raise for functions that don't aggregate

From 47121ddc1c655f428c6c3fcea8fbf02eba85600a Mon Sep 17 00:00:00 2001
From: tkmz-n <60312218+tkmz-n@users.noreply.github.com>
Date: Sat, 22 Aug 2020 07:42:50 +0900
Subject: [PATCH 3/7] TST: resample does not yield empty groups (#10603)
 (#35799)

---
 pandas/tests/resample/test_timedelta.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py
index 0fbb60c176b30..3fa85e62d028c 100644
--- a/pandas/tests/resample/test_timedelta.py
+++ b/pandas/tests/resample/test_timedelta.py
@@ -150,3 +150,18 @@ def test_resample_timedelta_edge_case(start, end, freq, resample_freq):
     tm.assert_index_equal(result.index, expected_index)
     assert result.index.freq == expected_index.freq
     assert not np.isnan(result[-1])
+
+
+def test_resample_with_timedelta_yields_no_empty_groups():
+    # GH 10603
+    df = pd.DataFrame(
+        np.random.normal(size=(10000, 4)),
+        index=pd.timedelta_range(start="0s", periods=10000, freq="3906250n"),
+    )
+    result = df.loc["1s":, :].resample("3s").apply(lambda x: len(x))
+
+    expected = pd.DataFrame(
+        [[768.0] * 4] * 12 + [[528.0] * 4],
+        index=pd.timedelta_range(start="1s", periods=13, freq="3s"),
+    )
+    tm.assert_frame_equal(result, expected)

From 1decb3e0ee1923a29b8eded7507bcb783b3870d0 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 21 Aug 2020 18:48:02 -0700
Subject: [PATCH 4/7] revert accidental rebase

---
 pandas/core/groupby/generic.py | 61 ++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 4b1f6cfe0a662..60e23b14eaf09 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -30,7 +30,7 @@
 import numpy as np
 
 from pandas._libs import lib
-from pandas._typing import ArrayLike, FrameOrSeries, FrameOrSeriesUnion
+from pandas._typing import FrameOrSeries, FrameOrSeriesUnion
 from pandas.util._decorators import Appender, Substitution, doc
 
 from pandas.core.dtypes.cast import (
@@ -59,7 +59,6 @@
     validate_func_kwargs,
 )
 import pandas.core.algorithms as algorithms
-from pandas.core.arrays import ExtensionArray
 from pandas.core.base import DataError, SpecificationError
 import pandas.core.common as com
 from pandas.core.construction import create_series_with_explicit_dtype
@@ -1034,31 +1033,32 @@ def _cython_agg_blocks(
 
         no_result = object()
 
-        def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike:
-            # see if we can cast the values to the desired dtype
+        def cast_result_block(result, block: "Block", how: str) -> "Block":
+            # see if we can cast the block to the desired dtype
             # this may not be the original dtype
             assert not isinstance(result, DataFrame)
             assert result is not no_result
 
-            dtype = maybe_cast_result_dtype(values.dtype, how)
+            dtype = maybe_cast_result_dtype(block.dtype, how)
             result = maybe_downcast_numeric(result, dtype)
 
-            if isinstance(values, ExtensionArray) and isinstance(result, np.ndarray):
-                # e.g. values was an IntegerArray
-                # (1, N) case can occur if values was Categorical
+            if block.is_extension and isinstance(result, np.ndarray):
+                # e.g. block.values was an IntegerArray
+                # (1, N) case can occur if block.values was Categorical
                 #  and result is ndarray[object]
                 # TODO(EA2D): special casing not needed with 2D EAs
                 assert result.ndim == 1 or result.shape[0] == 1
                 try:
                     # Cast back if feasible
-                    result = type(values)._from_sequence(
-                        result.ravel(), dtype=values.dtype
+                    result = type(block.values)._from_sequence(
+                        result.ravel(), dtype=block.values.dtype
                     )
                 except (ValueError, TypeError):
                     # reshape to be valid for non-Extension Block
                     result = result.reshape(1, -1)
 
-            return result
+            agg_block: "Block" = block.make_block(result)
+            return agg_block
 
         def blk_func(block: "Block") -> List["Block"]:
             new_blocks: List["Block"] = []
@@ -1092,25 +1092,28 @@ def blk_func(block: "Block") -> List["Block"]:
                 # Categoricals. This will done by later self._reindex_output()
                 # Doing it here creates an error. See GH#34951
                 sgb = get_groupby(obj, self.grouper, observed=True)
-                result = sgb.aggregate(lambda x: alt(x, axis=self.axis))
-
-                assert isinstance(result, (Series, DataFrame))  # for mypy
-                # In the case of object dtype block, it may have been split
-                #  in the operation.  We un-split here.
-                result = result._consolidate()
-                assert isinstance(result, (Series, DataFrame))  # for mypy
-                assert len(result._mgr.blocks) == 1
-
-                # unwrap DataFrame to get array
-                result = result._mgr.blocks[0].values
-                if isinstance(result, np.ndarray) and result.ndim == 1:
-                    result = result.reshape(1, -1)
-                res_values = cast_agg_result(result, block.values, how)
-                agg_block = block.make_block(res_values)
-                new_blocks = [agg_block]
+                try:
+                    result = sgb.aggregate(lambda x: alt(x, axis=self.axis))
+                except TypeError:
+                    # we may have an exception in trying to aggregate
+                    # continue and exclude the block
+                    raise
+                else:
+                    assert isinstance(result, (Series, DataFrame))  # for mypy
+                    # In the case of object dtype block, it may have been split
+                    #  in the operation.  We un-split here.
+                    result = result._consolidate()
+                    assert isinstance(result, (Series, DataFrame))  # for mypy
+                    assert len(result._mgr.blocks) == 1
+
+                    # unwrap DataFrame to get array
+                    result = result._mgr.blocks[0].values
+                    if isinstance(result, np.ndarray) and result.ndim == 1:
+                        result = result.reshape(1, -1)
+                    agg_block = cast_result_block(result, block, how)
+                    new_blocks = [agg_block]
             else:
-                res_values = cast_agg_result(result, block.values, how)
-                agg_block = block.make_block(res_values)
+                agg_block = cast_result_block(result, block, how)
                 new_blocks = [agg_block]
             return new_blocks
 

From 0ab182129d192a03e69ecefe849b2fb1166d05f8 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 24 Aug 2020 18:12:27 -0700
Subject: [PATCH 5/7] BUG: item_cache invalidation in get_numeric_data

---
 pandas/core/internals/managers.py           |  1 -
 pandas/tests/frame/methods/test_cov_corr.py | 17 +++++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index f05d4cf1c4be6..d611a72b83204 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -731,7 +731,6 @@ def get_numeric_data(self, copy: bool = False) -> "BlockManager":
         copy : bool, default False
             Whether to copy the blocks
         """
-        self._consolidate_inplace()
         return self._combine([b for b in self.blocks if b.is_numeric], copy)
 
     def _combine(self, blocks: List[Block], copy: bool = True) -> "BlockManager":
diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
index d3548b639572d..f307acd8c2178 100644
--- a/pandas/tests/frame/methods/test_cov_corr.py
+++ b/pandas/tests/frame/methods/test_cov_corr.py
@@ -191,6 +191,23 @@ def test_corr_nullable_integer(self, nullable_column, other_column, method):
         expected = pd.DataFrame(np.ones((2, 2)), columns=["a", "b"], index=["a", "b"])
         tm.assert_frame_equal(result, expected)
 
+    def test_corr_item_cache(self):
+        # Check that corr does not lead to incorrect entries in item_cache
+
+        df = pd.DataFrame({"A": range(10)})
+        df["B"] = range(10)[::-1]
+
+        ser = df["A"]  # populate item_cache
+        assert len(df._mgr.blocks) == 2
+
+        _ = df.corr()
+
+        # Check that the corr didnt break link between ser and df
+        ser.values[0] = 99
+        assert df.loc[0, "A"] == 99
+        assert df["A"] is ser
+        assert df.values[0, 0] == 99
+
 
 class TestDataFrameCorrWith:
     def test_corrwith(self, datetime_frame):

From 3778d1faa2cc497ccac31f91a9bb91f3b4da79b2 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 26 Aug 2020 11:10:03 -0700
Subject: [PATCH 6/7] whatsnew

---
 doc/source/whatsnew/v1.1.2.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst
index 7739a483e3d38..8bc54e6da4436 100644
--- a/doc/source/whatsnew/v1.1.2.rst
+++ b/doc/source/whatsnew/v1.1.2.rst
@@ -28,7 +28,7 @@ Bug fixes
 - Bug in :meth:`DataFrame.eval` with ``object`` dtype column binary operations (:issue:`35794`)
 - Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`)
 - Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`)
--
+- Bug in :meth:`DataFrame.corr` causing subsequent indexing lookups to be incorrect (:issue:`35882`)
 
 .. ---------------------------------------------------------------------------
 

From f31f1154a38a9a432328ab6b416bc42f40b8a158 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 26 Aug 2020 19:07:05 -0700
Subject: [PATCH 7/7] dummy commit to force Travis