From 3ba28431d4791b9dd1b0f1a9a73728af9123b4cc Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 14 Jan 2018 18:03:07 -0500
Subject: [PATCH 1/4] PERF: remove use of Panel & perf in rolling corr/cov

closes #17917
---
 asv_bench/benchmarks/rolling.py | 25 ++++++++++++++++--
 pandas/core/indexes/base.py     |  5 ++++
 pandas/core/window.py           | 47 +++++++++++++++++++++------------
 pandas/tests/test_window.py     | 22 ++++++++-------
 4 files changed, 71 insertions(+), 28 deletions(-)

diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
index 59cf7d090a622..75990d83f8212 100644
--- a/asv_bench/benchmarks/rolling.py
+++ b/asv_bench/benchmarks/rolling.py
@@ -11,8 +11,8 @@ class Methods(object):
               [10, 1000],
               ['int', 'float'],
               ['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt',
-               'sum', 'corr', 'cov'])
-    param_names = ['constructor', 'window', 'dtype', 'method']
+               'sum'])
+    param_names = ['contructor', 'window', 'dtype', 'method']
 
     def setup(self, constructor, window, dtype, method):
         N = 10**5
@@ -23,6 +23,27 @@ def time_rolling(self, constructor, window, dtype, method):
         getattr(self.roll, method)()
 
 
+class Pairwise(object):
+
+    sample_time = 0.2
+    params = ([10, 1000, None],
+              ['corr', 'cov'],
+              [True, False])
+    param_names = ['window', 'method', 'pairwise']
+
+    def setup(self, window, method, pairwise):
+        N = 10**4
+        arr = np.random.random(N)
+        self.df = pd.DataFrame(arr)
+
+    def time_pairwise(self, window, method, pairwise):
+        if window is None:
+            r = self.df.expanding()
+        else:
+            r = self.df.rolling(window=window)
+        getattr(r, method)(self.df, pairwise=pairwise)
+
+
 class Quantile(object):
 
     sample_time = 0.2
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 626f3dc86556a..619c8ea6193ee 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -1102,6 +1102,11 @@ def _assert_can_do_op(self, value):
     def nlevels(self):
         return 1
 
+    @property
+    def levels(self):
+        """ return a list my levels """
+        return [list(self)]
+
     def _get_names(self):
         return FrozenList((self.name, ))
 
diff --git a/pandas/core/window.py b/pandas/core/window.py
index 4d6a1de60f59b..3093930578a0e 100644
--- a/pandas/core/window.py
+++ b/pandas/core/window.py
@@ -1863,25 +1863,38 @@ def dataframe_from_int_dict(data, frame_template):
                             results[i][j] = f(*_prep_binary(arg1.iloc[:, i],
                                                             arg2.iloc[:, j]))
 
-                # TODO: not the most efficient (perf-wise)
-                # though not bad code-wise
-                from pandas import Panel, MultiIndex, concat
-
-                with warnings.catch_warnings(record=True):
-                    p = Panel.from_dict(results).swapaxes('items', 'major')
-                    if len(p.major_axis) > 0:
-                        p.major_axis = arg1.columns[p.major_axis]
-                    if len(p.minor_axis) > 0:
-                        p.minor_axis = arg2.columns[p.minor_axis]
-
-                if len(p.items):
+                from pandas import MultiIndex, concat
+
+                result_index = arg1.index.union(arg2.index)
+                if len(result_index):
+
+                    # construct result frame
                     result = concat(
-                        [p.iloc[i].T for i in range(len(p.items))],
-                        keys=p.items)
+                        [concat([results[i][j]
+                                 for j, c in enumerate(arg2.columns)],
+                                ignore_index=True)
+                         for i, c in enumerate(arg1.columns)],
+                        ignore_index=True,
+                        axis=1)
+                    result.columns = arg1.columns
+
+                    # set the index and reorder
+                    if arg2.columns.nlevels > 1:
+                        result.index = MultiIndex.from_product(
+                            arg2.columns.levels + result_index.levels)
+                        result = result.reorder_levels([2, 0, 1]).sort_index()
+                    else:
+                        result.index = MultiIndex.from_product(
+                            [range(len(arg2.columns)),
+                             range(len(result_index))])
+                        result = result.swaplevel(1, 0).sort_index()
+                        result.index = MultiIndex.from_product(
+                            result_index.levels + arg2.columns.levels)
                 else:
 
+                    # empty result
                     result = DataFrame(
-                        index=MultiIndex(levels=[arg1.index, arg1.columns],
+                        index=MultiIndex(levels=[arg1.index, arg2.columns],
                                          labels=[[], []]),
                         columns=arg2.columns,
                         dtype='float64')
@@ -1890,9 +1903,9 @@ def dataframe_from_int_dict(data, frame_template):
                 # reset our column names to arg2 names
                 # careful not to mutate the original names
                 result.columns = result.columns.set_names(
-                    arg2.columns.names)
+                    arg1.columns.names)
                 result.index = result.index.set_names(
-                    arg1.index.names + arg1.columns.names)
+                    result_index.names + arg2.columns.names)
 
                 return result
 
diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py
index 22526d14a7168..dabdb1e8e689c 100644
--- a/pandas/tests/test_window.py
+++ b/pandas/tests/test_window.py
@@ -14,6 +14,7 @@
 import pandas.tseries.offsets as offsets
 from pandas.core.base import SpecificationError
 from pandas.errors import UnsupportedFunctionCall
+from pandas.core.sorting import safe_sort
 import pandas.util.testing as tm
 import pandas.util._test_decorators as td
 from pandas.compat import range, zip
@@ -1645,7 +1646,7 @@ def compare(self, result, expected):
         result = result.dropna().values
         expected = expected.dropna().values
 
-        tm.assert_numpy_array_equal(result, expected)
+        tm.assert_numpy_array_equal(result, expected, check_dtype=False)
 
     @pytest.mark.parametrize('f', [lambda x: x.cov(), lambda x: x.corr()])
     def test_no_flex(self, f):
@@ -1670,15 +1671,19 @@ def test_no_flex(self, f):
     def test_pairwise_with_self(self, f):
 
         # DataFrame with itself, pairwise=True
-        results = [f(df) for df in self.df1s]
-        for (df, result) in zip(self.df1s, results):
+        # note that we may construct the 1st level of the MI
+        # in a non-motononic way, so compare accordingly
+        results = []
+        for i, df in enumerate(self.df1s):
+            result = f(df)
             tm.assert_index_equal(result.index.levels[0],
                                   df.index,
                                   check_names=False)
-            tm.assert_index_equal(result.index.levels[1],
-                                  df.columns,
-                                  check_names=False)
+            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
+                                        safe_sort(df.columns.unique()))
             tm.assert_index_equal(result.columns, df.columns)
+            results.append(df)
+
         for i, result in enumerate(results):
             if i > 0:
                 self.compare(result, results[0])
@@ -1716,9 +1721,8 @@ def test_pairwise_with_other(self, f):
             tm.assert_index_equal(result.index.levels[0],
                                   df.index,
                                   check_names=False)
-            tm.assert_index_equal(result.index.levels[1],
-                                  self.df2.columns,
-                                  check_names=False)
+            tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
+                                        safe_sort(self.df2.columns.unique()))
         for i, result in enumerate(results):
             if i > 0:
                 self.compare(result, results[0])

From e4d7ce946162dbcda5ca45e0cfaaf156b77b1423 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 15 Jan 2018 18:38:32 -0500
Subject: [PATCH 2/4] whatsnew

---
 doc/source/whatsnew/v0.23.0.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 2bd2bb199bf1f..5db29cb76b106 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -383,7 +383,7 @@ Performance Improvements
 - :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`)
 - Improved performance of :func:`DataFrame.median` with ``axis=1`` when bottleneck is not installed (:issue:`16468`)
 - Improved performance of :func:`MultiIndex.get_loc` for large indexes, at the cost of a reduction in performance for small ones (:issue:`18519`)
-
+- Improved performance of pairwise ``.rolling()`` and ``.expanding()`` with ``.cov()`` and ``.corr()`` operations (:issue:`17917`)
 
 .. _whatsnew_0230.docs:
 

From 44b22e00db2ae02abcab90966f03feca9dc9a1a8 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 15 Jan 2018 20:07:18 -0500
Subject: [PATCH 3/4] fix incorrect usage in pivot

---
 pandas/core/reshape/pivot.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index 0e92fc4edce85..a4c9848dca900 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -99,19 +99,15 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
 
     if not dropna:
         from pandas import MultiIndex
-        try:
+        if table.index.nlevels > 1:
             m = MultiIndex.from_arrays(cartesian_product(table.index.levels),
                                        names=table.index.names)
             table = table.reindex(m, axis=0)
-        except AttributeError:
-            pass  # it's a single level
 
-        try:
+        if table.columns.nlevels > 1:
             m = MultiIndex.from_arrays(cartesian_product(table.columns.levels),
                                        names=table.columns.names)
             table = table.reindex(m, axis=1)
-        except AttributeError:
-            pass  # it's a single level or a series
 
     if isinstance(table, ABCDataFrame):
         table = table.sort_index(axis=1)

From 2e8aaa11a870d68f3759eb506effdbbb891249b8 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 16 Jan 2018 06:06:47 -0500
Subject: [PATCH 4/4] levels compat

---
 pandas/core/indexes/base.py | 5 -----
 pandas/core/window.py       | 4 ++--
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 619c8ea6193ee..626f3dc86556a 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -1102,11 +1102,6 @@ def _assert_can_do_op(self, value):
     def nlevels(self):
         return 1
 
-    @property
-    def levels(self):
-        """ return a list my levels """
-        return [list(self)]
-
     def _get_names(self):
         return FrozenList((self.name, ))
 
diff --git a/pandas/core/window.py b/pandas/core/window.py
index 3093930578a0e..a3f19ef50459d 100644
--- a/pandas/core/window.py
+++ b/pandas/core/window.py
@@ -1881,7 +1881,7 @@ def dataframe_from_int_dict(data, frame_template):
                     # set the index and reorder
                     if arg2.columns.nlevels > 1:
                         result.index = MultiIndex.from_product(
-                            arg2.columns.levels + result_index.levels)
+                            arg2.columns.levels + [result_index])
                         result = result.reorder_levels([2, 0, 1]).sort_index()
                     else:
                         result.index = MultiIndex.from_product(
@@ -1889,7 +1889,7 @@ def dataframe_from_int_dict(data, frame_template):
                              range(len(result_index))])
                         result = result.swaplevel(1, 0).sort_index()
                         result.index = MultiIndex.from_product(
-                            result_index.levels + arg2.columns.levels)
+                            [result_index] + [arg2.columns])
                 else:
 
                     # empty result