From 33728dee4f4f0026855ca2a9f5e2337de9d2ab5f Mon Sep 17 00:00:00 2001 From: Joshua Bradt Date: Wed, 8 Feb 2017 13:13:14 -0500 Subject: [PATCH 1/8] BUG: Fixed handling of non-list value_vars in melt Closes #15348. --- pandas/core/reshape.py | 2 ++ pandas/tests/test_reshape.py | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index d6287f17c8387..df30435b83139 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -771,6 +771,8 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, if value_vars is not None: if not isinstance(value_vars, (tuple, list, np.ndarray)): value_vars = [value_vars] + else: + value_vars = list(value_vars) frame = frame.loc[:, id_vars + value_vars] else: frame = frame.copy() diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index ed5ec970ba33c..78ae78c773400 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -56,6 +56,20 @@ def test_value_vars(self): columns=['id1', 'id2', 'variable', 'value']) tm.assert_frame_equal(result4, expected4) + def test_value_vars_types(self): + result_with_tuple = melt(self.df, id_vars=['id1', 'id2'], value_vars=('A', 'B')) + result_with_array = melt(self.df, id_vars=['id1', 'id2'], value_vars=np.array(['A', 'B'])) + + expected = DataFrame({'id1': self.df['id1'].tolist() * 2, + 'id2': self.df['id2'].tolist() * 2, + 'variable': ['A'] * 10 + ['B'] * 10, + 'value': (self.df['A'].tolist() + + self.df['B'].tolist())}, + columns=['id1', 'id2', 'variable', 'value']) + + tm.assert_frame_equal(result_with_tuple, expected) + tm.assert_frame_equal(result_with_array, expected) + def test_custom_var_name(self): result5 = melt(self.df, var_name=self.var_name) self.assertEqual(result5.columns.tolist(), ['var', 'value']) From d4c5da3052ab6f460f404a70b47314e752dfe512 Mon Sep 17 00:00:00 2001 From: Joshua Bradt Date: Wed, 8 Feb 2017 19:29:23 -0500 Subject: [PATCH 2/8] Improved type checking and tests. Added whatsnew note. --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/reshape.py | 4 ++-- pandas/tests/test_reshape.py | 9 ++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 9afcf85c929a7..40ea83b934204 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -517,3 +517,4 @@ Bug Fixes - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) +- Bug in ``pd.melt`` where passing a tuple value for ``value_vars`` caused a TypeError (:issue:`15348`) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index df30435b83139..53cf43a02ffce 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -761,7 +761,7 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, """ # TODO: what about the existing index? if id_vars is not None: - if not isinstance(id_vars, (tuple, list, np.ndarray)): + if not is_list_like(id_vars): id_vars = [id_vars] else: id_vars = list(id_vars) @@ -769,7 +769,7 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, id_vars = [] if value_vars is not None: - if not isinstance(value_vars, (tuple, list, np.ndarray)): + if not is_list_like(value_vars): value_vars = [value_vars] else: value_vars = list(value_vars) diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index 78ae78c773400..89da4f0300443 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -57,9 +57,6 @@ def test_value_vars(self): tm.assert_frame_equal(result4, expected4) def test_value_vars_types(self): - result_with_tuple = melt(self.df, id_vars=['id1', 'id2'], value_vars=('A', 'B')) - result_with_array = melt(self.df, id_vars=['id1', 'id2'], value_vars=np.array(['A', 'B'])) - expected = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, 'variable': ['A'] * 10 + ['B'] * 10, @@ -67,8 +64,10 @@ def test_value_vars_types(self): self.df['B'].tolist())}, columns=['id1', 'id2', 'variable', 'value']) - tm.assert_frame_equal(result_with_tuple, expected) - tm.assert_frame_equal(result_with_array, expected) + for type_ in (tuple, list, np.array): + result = melt(self.df, id_vars=['id1', 'id2'], + value_vars=type_(('A', 'B'))) + tm.assert_frame_equal(result, expected) def test_custom_var_name(self): result5 = melt(self.df, var_name=self.var_name) From 74062224646846506108291fd9d13223991dc004 Mon Sep 17 00:00:00 2001 From: Joshua Bradt Date: Thu, 9 Feb 2017 09:10:56 -0500 Subject: [PATCH 3/8] Fixed formatting. Added comment with issue number to test. --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/tests/test_reshape.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 40ea83b934204..16c3d51888724 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -517,4 +517,4 @@ Bug Fixes - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`) - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) -- Bug in ``pd.melt`` where passing a tuple value for ``value_vars`` caused a TypeError (:issue:`15348`) +- Bug in ``pd.melt`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index 89da4f0300443..0de098f75ec38 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -57,6 +57,7 @@ def test_value_vars(self): tm.assert_frame_equal(result4, expected4) def test_value_vars_types(self): + # GH 15348 expected = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, 'variable': ['A'] * 10 + ['B'] * 10, From 455a3103d1209dfd40882ca60712772199418561 Mon Sep 17 00:00:00 2001 From: Joshua Bradt Date: Thu, 9 Feb 2017 17:14:37 -0500 Subject: [PATCH 4/8] Tested types when using MultiIndex to ensure they are lists. Any non-list type for id_vars or value_vars now raises an error message if the DataFrame has a MultiIndex for its columns. --- pandas/core/reshape.py | 8 ++++++++ pandas/tests/test_reshape.py | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 53cf43a02ffce..53a324fc444ca 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -763,6 +763,10 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, if id_vars is not None: if not is_list_like(id_vars): id_vars = [id_vars] + elif (isinstance(frame.columns, MultiIndex) + and not isinstance(id_vars, list)): + raise TypeError('id_vars must be a list of tuples when columns' + ' are a MultiIndex') else: id_vars = list(id_vars) else: @@ -771,6 +775,10 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, if value_vars is not None: if not is_list_like(value_vars): value_vars = [value_vars] + elif (isinstance(frame.columns, MultiIndex) + and not isinstance(value_vars, list)): + raise TypeError('value_vars must be a list of tuples when' + ' columns are a MultiIndex') else: value_vars = list(value_vars) frame = frame.loc[:, id_vars + value_vars] diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index 0de098f75ec38..7794a588ba9d4 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -70,6 +70,24 @@ def test_value_vars_types(self): value_vars=type_(('A', 'B'))) tm.assert_frame_equal(result, expected) + def test_id_and_value_vars_types_with_multiindex(self): + expected = DataFrame({ + ('A', 'a'): self.df1[('A', 'a')], + 'CAP': ['B'] * len(self.df1), + 'low': ['b'] * len(self.df1), + 'value': self.df1[('B', 'b')], + }, columns=[('A', 'a'), 'CAP', 'low', 'value']) + + for id_vars in ([('A', 'a')], ('A', 'a')): + for value_vars in ([('B', 'b')], ('B', 'b')): + if isinstance(id_vars, list) and isinstance(value_vars, list): + result = melt(self.df1, id_vars=id_vars, + value_vars=value_vars) + tm.assert_frame_equal(result, expected) + else: + with self.assertRaisesRegex(TypeError, r'MultiIndex'): + melt(self.df1, id_vars=id_vars, value_vars=value_vars) + def test_custom_var_name(self): result5 = melt(self.df, var_name=self.var_name) self.assertEqual(result5.columns.tolist(), ['var', 'value']) From 129d531a9476939e719ef3bf5ad5061bbd9af822 Mon Sep 17 00:00:00 2001 From: Joshua Bradt Date: Fri, 10 Feb 2017 09:26:18 -0500 Subject: [PATCH 5/8] Moved binary operators to satisfy flake8 --- pandas/core/reshape.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 53a324fc444ca..e487e01c3160a 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -763,8 +763,8 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, if id_vars is not None: if not is_list_like(id_vars): id_vars = [id_vars] - elif (isinstance(frame.columns, MultiIndex) - and not isinstance(id_vars, list)): + elif (isinstance(frame.columns, MultiIndex) and + not isinstance(id_vars, list)): raise TypeError('id_vars must be a list of tuples when columns' ' are a MultiIndex') else: @@ -775,8 +775,8 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, if value_vars is not None: if not is_list_like(value_vars): value_vars = [value_vars] - elif (isinstance(frame.columns, MultiIndex) - and not isinstance(value_vars, list)): + elif (isinstance(frame.columns, MultiIndex) and + not isinstance(value_vars, list)): raise TypeError('value_vars must be a list of tuples when' ' columns are a MultiIndex') else: From 20159c1abad24c92bb0525e9775c8b373b17fa4e Mon Sep 17 00:00:00 2001 From: Joshua Bradt Date: Fri, 10 Feb 2017 09:31:06 -0500 Subject: [PATCH 6/8] Changed exception classes to ValueError. --- pandas/core/reshape.py | 8 ++++---- pandas/tests/test_reshape.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index e487e01c3160a..cc0a8de9a5719 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -765,8 +765,8 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, id_vars = [id_vars] elif (isinstance(frame.columns, MultiIndex) and not isinstance(id_vars, list)): - raise TypeError('id_vars must be a list of tuples when columns' - ' are a MultiIndex') + raise ValueError('id_vars must be a list of tuples when columns' + ' are a MultiIndex') else: id_vars = list(id_vars) else: @@ -777,8 +777,8 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, value_vars = [value_vars] elif (isinstance(frame.columns, MultiIndex) and not isinstance(value_vars, list)): - raise TypeError('value_vars must be a list of tuples when' - ' columns are a MultiIndex') + raise ValueError('value_vars must be a list of tuples when' + ' columns are a MultiIndex') else: value_vars = list(value_vars) frame = frame.loc[:, id_vars + value_vars] diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index 7794a588ba9d4..8fecdd920ca52 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -85,7 +85,7 @@ def test_id_and_value_vars_types_with_multiindex(self): value_vars=value_vars) tm.assert_frame_equal(result, expected) else: - with self.assertRaisesRegex(TypeError, r'MultiIndex'): + with self.assertRaisesRegex(ValueError, r'MultiIndex'): melt(self.df1, id_vars=id_vars, value_vars=value_vars) def test_custom_var_name(self): From e9071353a85facc48c6eb2322ee673b6838dd429 Mon Sep 17 00:00:00 2001 From: Joshua Bradt Date: Fri, 10 Feb 2017 10:28:18 -0500 Subject: [PATCH 7/8] Split test into two parts --- pandas/tests/test_reshape.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index 8fecdd920ca52..6e24d52559dc4 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -70,7 +70,7 @@ def test_value_vars_types(self): value_vars=type_(('A', 'B'))) tm.assert_frame_equal(result, expected) - def test_id_and_value_vars_types_with_multiindex(self): + def test_vars_work_with_multiindex(self): expected = DataFrame({ ('A', 'a'): self.df1[('A', 'a')], 'CAP': ['B'] * len(self.df1), @@ -78,15 +78,22 @@ def test_id_and_value_vars_types_with_multiindex(self): 'value': self.df1[('B', 'b')], }, columns=[('A', 'a'), 'CAP', 'low', 'value']) - for id_vars in ([('A', 'a')], ('A', 'a')): - for value_vars in ([('B', 'b')], ('B', 'b')): - if isinstance(id_vars, list) and isinstance(value_vars, list): - result = melt(self.df1, id_vars=id_vars, - value_vars=value_vars) - tm.assert_frame_equal(result, expected) - else: - with self.assertRaisesRegex(ValueError, r'MultiIndex'): - melt(self.df1, id_vars=id_vars, value_vars=value_vars) + result = melt(self.df1, id_vars=[('A', 'a')], value_vars=[('B', 'b')]) + tm.assert_frame_equal(result, expected) + + def test_tuple_vars_fail_with_multiindex(self): + # melt should fail with an informative error message if + # the columns have a MultiIndex and a tuple is passed + # for id_vars or value_vars. + tuple_a = ('A', 'a') + list_a = [tuple_a] + tuple_b = ('B', 'b') + list_b = [tuple_b] + + for id_vars, value_vars in ((tuple_a, list_b), (list_a, tuple_b), + (tuple_a, tuple_b)): + with self.assertRaisesRegex(ValueError, r'MultiIndex'): + melt(self.df1, id_vars=id_vars, value_vars=value_vars) def test_custom_var_name(self): result5 = melt(self.df, var_name=self.var_name) From a2f2510734b08c81018b320c46e3c777583713ea Mon Sep 17 00:00:00 2001 From: Joshua Bradt Date: Fri, 10 Feb 2017 13:38:25 -0500 Subject: [PATCH 8/8] Changed to tm.assertRaisesRegexp for Python 2 compat. --- pandas/tests/test_reshape.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index 6e24d52559dc4..d587e4ea6a1fa 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -92,7 +92,7 @@ def test_tuple_vars_fail_with_multiindex(self): for id_vars, value_vars in ((tuple_a, list_b), (list_a, tuple_b), (tuple_a, tuple_b)): - with self.assertRaisesRegex(ValueError, r'MultiIndex'): + with tm.assertRaisesRegexp(ValueError, r'MultiIndex'): melt(self.df1, id_vars=id_vars, value_vars=value_vars) def test_custom_var_name(self):