From e988595e40fb98bdb3ca18f3037f647c36f53fa7 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Tue, 23 Jun 2020 00:06:39 +0000 Subject: [PATCH 01/10] added test for df.ewm.cov with multiindex --- pandas/tests/window/test_pairwise.py | 32 +++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py index bb305e93a3cf1..3172feeaf28da 100644 --- a/pandas/tests/window/test_pairwise.py +++ b/pandas/tests/window/test_pairwise.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from pandas import DataFrame, Series, date_range +from pandas import DataFrame, MultiIndex, Series, date_range import pandas._testing as tm from pandas.core.algorithms import safe_sort @@ -189,3 +189,33 @@ def test_corr_freq_memory_error(self): result = s.rolling("12H").corr(s) expected = Series([np.nan] * 5, index=date_range("2020", periods=5)) tm.assert_series_equal(result, expected) + + def test_cov_mulittindex(self): + # GH 34440 + + # create multiindexed DataFrame + columns = MultiIndex.from_product([["a", "b"], ["x", "y"], [0, 1]]) + index = range(3) + df = DataFrame( + np.random.normal(size=(len(index), len(columns))), + index=index, + columns=columns, + ) + + # flatten index then compute covariance + df_fi = df.copy() + df_fi.columns = ["".join(str(c) for c in col) for col in df_fi.columns.values] + cov_fi = df_fi.ewm(alpha=0.1).cov() + cov_fi.index = [ + "".join(str(symbol) for symbol in row_label) for row_label in cov_fi.index + ] + + # compute covariance matrix then flatten its multtindex + df_mi = df.copy() + cov_mi = df_mi.ewm(alpha=0.1).cov() + cov_mi.columns = ["".join(str(c) for c in col) for col in cov_mi.columns.values] + cov_mi.index = [ + "".join(str(symbol) for symbol in row_label) for row_label in cov_mi.index + ] + + tm.assert_frame_equal(cov_fi, cov_mi) From acba11a03d31bd186bb3e7548c1b3d6b97096332 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Wed, 24 Jun 2020 20:11:02 +0000 Subject: [PATCH 02/10] BUG: fixed _flex_binary_moment for multiindex --- pandas/core/window/common.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 413fe648903ac..2eaec18e69566 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -179,7 +179,9 @@ def dataframe_from_int_dict(data, frame_template): result.index = MultiIndex.from_product( arg2.columns.levels + [result_index] ) - result = result.reorder_levels([2, 0, 1]).sort_index() + num_levels = len(result.index.levels) + new_order = [num_levels - 1] + list(range(num_levels - 1)) + result = result.reorder_levels(new_order).sort_index() else: result.index = MultiIndex.from_product( [range(len(arg2.columns)), range(len(result_index))] From e743ed09be24aa084c7dbc40167b11ebdd0aa254 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Wed, 24 Jun 2020 20:13:53 +0000 Subject: [PATCH 03/10] added reference to GH issue --- pandas/core/window/common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index 2eaec18e69566..58e7841d4dde5 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -179,6 +179,7 @@ def dataframe_from_int_dict(data, frame_template): result.index = MultiIndex.from_product( arg2.columns.levels + [result_index] ) + # GH 34440 num_levels = len(result.index.levels) new_order = [num_levels - 1] + list(range(num_levels - 1)) result = result.reorder_levels(new_order).sort_index() From 42bfa99158d7ae22adf07eaa95c28b0b7ac8b312 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Wed, 24 Jun 2020 20:17:47 +0000 Subject: [PATCH 04/10] DOC: updated whatnew --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 60aa1759958f6..15c4a09f427c9 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -978,6 +978,7 @@ MultiIndex - Bug in :meth:`MultiIndex.intersection` was not guaranteed to preserve order when ``sort=False``. (:issue:`31325`) - Bug in :meth:`DataFrame.truncate` was dropping :class:`MultiIndex` names. (:issue:`34564`) +- Bug in :meth:`DataFrame.ewm.cov` was throwing ``AssertionError`` for :class:`MultiIndex` inputs (:issue:`34440`) .. ipython:: python From 3588d892497b5f70c0abaea0adf4a18542bb8b36 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 25 Jun 2020 04:49:32 +0000 Subject: [PATCH 05/10] DOC: moved note to rolling section of whatsnew --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 15c4a09f427c9..6d6a1891a3357 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -978,7 +978,6 @@ MultiIndex - Bug in :meth:`MultiIndex.intersection` was not guaranteed to preserve order when ``sort=False``. (:issue:`31325`) - Bug in :meth:`DataFrame.truncate` was dropping :class:`MultiIndex` names. (:issue:`34564`) -- Bug in :meth:`DataFrame.ewm.cov` was throwing ``AssertionError`` for :class:`MultiIndex` inputs (:issue:`34440`) .. ipython:: python @@ -1051,6 +1050,7 @@ Groupby/resample/rolling - Bug in :meth:`SeriesGroupBy.agg` where any column name was accepted in the named aggregation of ``SeriesGroupBy`` previously. The behaviour now allows only ``str`` and callables else would raise ``TypeError``. (:issue:`34422`) - Bug in :meth:`DataFrame.groupby` lost index, when one of the ``agg`` keys referenced an empty list (:issue:`32580`) - Bug in :meth:`Rolling.apply` where ``center=True`` was ignored when ``engine='numba'`` was specified (:issue:`34784`) +- Bug in :meth:`DataFrame.ewm.cov` was throwing ``AssertionError`` for :class:`MultiIndex` inputs (:issue:`34440`) Reshaping ^^^^^^^^^ From 032aedbbc1e365b0a9f2d539106f81d3121a26cf Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 25 Jun 2020 07:36:21 +0000 Subject: [PATCH 06/10] changed df to fixed seed, linearly spaced ints --- pandas/tests/window/test_pairwise.py | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py index 3172feeaf28da..c2c9d2b5b0d93 100644 --- a/pandas/tests/window/test_pairwise.py +++ b/pandas/tests/window/test_pairwise.py @@ -196,26 +196,15 @@ def test_cov_mulittindex(self): # create multiindexed DataFrame columns = MultiIndex.from_product([["a", "b"], ["x", "y"], [0, 1]]) index = range(3) + len_idx, num_cols = len(index), len(columns) df = DataFrame( - np.random.normal(size=(len(index), len(columns))), + np.arange(len_idx * num_cols).reshape(len_idx, num_cols), index=index, columns=columns, ) + result = df.ewm(alpha=0.1).cov() - # flatten index then compute covariance - df_fi = df.copy() - df_fi.columns = ["".join(str(c) for c in col) for col in df_fi.columns.values] - cov_fi = df_fi.ewm(alpha=0.1).cov() - cov_fi.index = [ - "".join(str(symbol) for symbol in row_label) for row_label in cov_fi.index - ] - - # compute covariance matrix then flatten its multtindex - df_mi = df.copy() - cov_mi = df_mi.ewm(alpha=0.1).cov() - cov_mi.columns = ["".join(str(c) for c in col) for col in cov_mi.columns.values] - cov_mi.index = [ - "".join(str(symbol) for symbol in row_label) for row_label in cov_mi.index - ] + # construct expected covariance df here + expected = ... - tm.assert_frame_equal(cov_fi, cov_mi) + tm.assert_frame_equal(result, expected) From 6c23fcd060919a262d541e091b34ab4ba0d89f9e Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 25 Jun 2020 07:38:10 +0000 Subject: [PATCH 07/10] removed extraneous comment --- pandas/tests/window/test_pairwise.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py index c2c9d2b5b0d93..ed2ddaf2ef428 100644 --- a/pandas/tests/window/test_pairwise.py +++ b/pandas/tests/window/test_pairwise.py @@ -193,7 +193,6 @@ def test_corr_freq_memory_error(self): def test_cov_mulittindex(self): # GH 34440 - # create multiindexed DataFrame columns = MultiIndex.from_product([["a", "b"], ["x", "y"], [0, 1]]) index = range(3) len_idx, num_cols = len(index), len(columns) From 869d56f7fabae17cc5293752d4d7a3db397d3d9d Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 25 Jun 2020 17:03:18 +0000 Subject: [PATCH 08/10] TST: hardcoded expected df for test_multiindex_cov --- pandas/tests/window/test_pairwise.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py index ed2ddaf2ef428..f9679c5956554 100644 --- a/pandas/tests/window/test_pairwise.py +++ b/pandas/tests/window/test_pairwise.py @@ -193,17 +193,24 @@ def test_corr_freq_memory_error(self): def test_cov_mulittindex(self): # GH 34440 - columns = MultiIndex.from_product([["a", "b"], ["x", "y"], [0, 1]]) + # create multiindexed DataFrame + columns = MultiIndex.from_product([["a", "b"], ["x", "y"], ["A", "B"]]) index = range(3) - len_idx, num_cols = len(index), len(columns) - df = DataFrame( - np.arange(len_idx * num_cols).reshape(len_idx, num_cols), + df = DataFrame(np.arange(24).reshape(3, 8), index=index, columns=columns,) + result = df.ewm(alpha=0.1).cov() + + index = MultiIndex.from_product([range(3), ["a", "b"], ["x", "y"], ["A", "B"]]) + columns = MultiIndex.from_product([["a", "b"], ["x", "y"], ["A", "B"]]) + expected = DataFrame( + np.vstack( + ( + np.full((8, 8), np.NaN), + np.full((8, 8), 32.000000), + np.full((8, 8), 63.881919), + ) + ), index=index, columns=columns, ) - result = df.ewm(alpha=0.1).cov() - - # construct expected covariance df here - expected = ... tm.assert_frame_equal(result, expected) From 377d235a252f6aadb9e90d8fd4b81d7348ebe11f Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 25 Jun 2020 17:04:48 +0000 Subject: [PATCH 09/10] TST: cleaned up comment + blank line --- pandas/tests/window/test_pairwise.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py index f9679c5956554..197a35ea3e036 100644 --- a/pandas/tests/window/test_pairwise.py +++ b/pandas/tests/window/test_pairwise.py @@ -193,10 +193,10 @@ def test_corr_freq_memory_error(self): def test_cov_mulittindex(self): # GH 34440 - # create multiindexed DataFrame columns = MultiIndex.from_product([["a", "b"], ["x", "y"], ["A", "B"]]) index = range(3) df = DataFrame(np.arange(24).reshape(3, 8), index=index, columns=columns,) + result = df.ewm(alpha=0.1).cov() index = MultiIndex.from_product([range(3), ["a", "b"], ["x", "y"], ["A", "B"]]) From dc388acc93469c0c0e6f0fb2f8b0a07a29387da4 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 25 Jun 2020 17:07:23 +0000 Subject: [PATCH 10/10] TST: clean up index definition --- pandas/tests/window/test_pairwise.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py index 197a35ea3e036..e82d4b8cbf770 100644 --- a/pandas/tests/window/test_pairwise.py +++ b/pandas/tests/window/test_pairwise.py @@ -193,14 +193,14 @@ def test_corr_freq_memory_error(self): def test_cov_mulittindex(self): # GH 34440 - columns = MultiIndex.from_product([["a", "b"], ["x", "y"], ["A", "B"]]) + columns = MultiIndex.from_product([list("ab"), list("xy"), list("AB")]) index = range(3) df = DataFrame(np.arange(24).reshape(3, 8), index=index, columns=columns,) result = df.ewm(alpha=0.1).cov() - index = MultiIndex.from_product([range(3), ["a", "b"], ["x", "y"], ["A", "B"]]) - columns = MultiIndex.from_product([["a", "b"], ["x", "y"], ["A", "B"]]) + index = MultiIndex.from_product([range(3), list("ab"), list("xy"), list("AB")]) + columns = MultiIndex.from_product([list("ab"), list("xy"), list("AB")]) expected = DataFrame( np.vstack( (