From 165fd722cb51e53dc7917d2cc62f3ec173983573 Mon Sep 17 00:00:00 2001 From: steveya Date: Mon, 7 Sep 2020 17:50:02 +0800 Subject: [PATCH 01/23] BUG: GH36113 --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/core/reshape/reshape.py | 2 +- pandas/tests/frame/test_reshape.py | 12 ++++++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 9a778acba4764..d28bbdedec8e8 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -322,7 +322,7 @@ Reshaping - Bug in :meth:`DataFrame.pivot_table` with ``aggfunc='count'`` or ``aggfunc='sum'`` returning ``NaN`` for missing categories when pivoted on a ``Categorical``. Now returning ``0`` (:issue:`31422`) - Bug in :func:`union_indexes` where input index names are not preserved in some cases. Affects :func:`concat` and :class:`DataFrame` constructor (:issue:`13475`) - Bug in func :meth:`crosstab` when using multiple columns with ``margins=True`` and ``normalize=True`` (:issue:`35144`) -- +- Bug in :meth:`DataFrame.stack` for empty DataFrame (:issue:`36113`) Sparse ^^^^^^ diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index e81dd8f0c735c..ed5de5332c299 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -517,7 +517,7 @@ def factorize(index): # For homogeneous EAs, frame._values will coerce to object. So # we concatenate instead. dtypes = list(frame.dtypes._values) - dtype = dtypes[0] + dtype = dtypes[0] if len(dtypes) > 0 else object if is_extension_array_dtype(dtype): arr = dtype.construct_array_type() diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index d80ebaa09b6a8..c2eb6ba5b8eac 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -1273,6 +1273,18 @@ def test_stack_timezone_aware_values(): tm.assert_series_equal(result, expected) +def test_stack_empty_frame(): + tm.assert_series_equal( + DataFrame().stack(), Series(index=MultiIndex([[], []], [[], []])) + ) + tm.assert_series_equal( + DataFrame().stack(dropna=True), Series(index=MultiIndex([[], []], [[], []])) + ) + tm.assert_frame_equal( + DataFrame().stack().unstack(), DataFrame() + ) + + def test_unstacking_multi_index_df(): # see gh-30740 df = DataFrame( From e0c1a8d0ee474e67e00338dd7ef262e098422436 Mon Sep 17 00:00:00 2001 From: steveya Date: Mon, 7 Sep 2020 23:30:50 +0800 Subject: [PATCH 02/23] modify tests to avoid deprrecated errors --- pandas/tests/frame/test_reshape.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index c2eb6ba5b8eac..4d0a9cd780b56 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -1275,14 +1275,12 @@ def test_stack_timezone_aware_values(): def test_stack_empty_frame(): tm.assert_series_equal( - DataFrame().stack(), Series(index=MultiIndex([[], []], [[], []])) + DataFrame().stack(), Series(index=MultiIndex([[], []], [[], []]), dtype=object) ) tm.assert_series_equal( - DataFrame().stack(dropna=True), Series(index=MultiIndex([[], []], [[], []])) - ) - tm.assert_frame_equal( - DataFrame().stack().unstack(), DataFrame() + DataFrame().stack(dropna=True), Series(index=MultiIndex([[], []], [[], []]), dtype=object) ) + tm.assert_frame_equal(DataFrame().stack().unstack(), DataFrame()) def test_unstacking_multi_index_df(): From f765acf95947d896916dd4294540df3b4687ac23 Mon Sep 17 00:00:00 2001 From: steveya Date: Mon, 7 Sep 2020 23:32:26 +0800 Subject: [PATCH 03/23] PEP 8 compliant --- pandas/tests/frame/test_reshape.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 4d0a9cd780b56..54f36fd350a3e 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -1275,10 +1275,12 @@ def test_stack_timezone_aware_values(): def test_stack_empty_frame(): tm.assert_series_equal( - DataFrame().stack(), Series(index=MultiIndex([[], []], [[], []]), dtype=object) + DataFrame().stack(), + Series(index=MultiIndex([[], []], [[], []]), dtype=object) ) tm.assert_series_equal( - DataFrame().stack(dropna=True), Series(index=MultiIndex([[], []], [[], []]), dtype=object) + DataFrame().stack(dropna=True), + Series(index=MultiIndex([[], []], [[], []]), dtype=object) ) tm.assert_frame_equal(DataFrame().stack().unstack(), DataFrame()) From 109d3120ec713c9f5c77704ba2c3a72d10d5671c Mon Sep 17 00:00:00 2001 From: steveya Date: Tue, 8 Sep 2020 00:05:28 +0800 Subject: [PATCH 04/23] remove trailing white space --- pandas/tests/frame/test_reshape.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 54f36fd350a3e..fa631a8365703 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -1275,11 +1275,11 @@ def test_stack_timezone_aware_values(): def test_stack_empty_frame(): tm.assert_series_equal( - DataFrame().stack(), + DataFrame().stack(), Series(index=MultiIndex([[], []], [[], []]), dtype=object) ) tm.assert_series_equal( - DataFrame().stack(dropna=True), + DataFrame().stack(dropna=True), Series(index=MultiIndex([[], []], [[], []]), dtype=object) ) tm.assert_frame_equal(DataFrame().stack().unstack(), DataFrame()) From 519a140564923029c01b011d2a4198de107ec78e Mon Sep 17 00:00:00 2001 From: steveya Date: Tue, 8 Sep 2020 00:47:29 +0800 Subject: [PATCH 05/23] black format checked --- pandas/tests/frame/test_reshape.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index fa631a8365703..95ae3b887a5dc 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -1275,12 +1275,11 @@ def test_stack_timezone_aware_values(): def test_stack_empty_frame(): tm.assert_series_equal( - DataFrame().stack(), - Series(index=MultiIndex([[], []], [[], []]), dtype=object) + DataFrame().stack(), Series(index=MultiIndex([[], []], [[], []]), dtype=object) ) tm.assert_series_equal( DataFrame().stack(dropna=True), - Series(index=MultiIndex([[], []], [[], []]), dtype=object) + Series(index=MultiIndex([[], []], [[], []]), dtype=object), ) tm.assert_frame_equal(DataFrame().stack().unstack(), DataFrame()) From 9d20ff53320b0e22a4b1fa40c0d4809ca9cf104a Mon Sep 17 00:00:00 2001 From: steveya Date: Wed, 9 Sep 2020 14:15:19 +0800 Subject: [PATCH 06/23] DataFrame().stack should return an empty Series with dtype np.float64 instead of object --- pandas/tests/frame/test_reshape.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 95ae3b887a5dc..4e4bae5b82c0d 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -1275,11 +1275,11 @@ def test_stack_timezone_aware_values(): def test_stack_empty_frame(): tm.assert_series_equal( - DataFrame().stack(), Series(index=MultiIndex([[], []], [[], []]), dtype=object) + DataFrame().stack(), Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64) ) tm.assert_series_equal( DataFrame().stack(dropna=True), - Series(index=MultiIndex([[], []], [[], []]), dtype=object), + Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64), ) tm.assert_frame_equal(DataFrame().stack().unstack(), DataFrame()) From d460db6da6e3409714e568108e6952da5a7f03ef Mon Sep 17 00:00:00 2001 From: steveya Date: Wed, 9 Sep 2020 14:50:11 +0800 Subject: [PATCH 07/23] PEP8 again. --- pandas/tests/frame/test_reshape.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 4e4bae5b82c0d..783e02427e0b4 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -1275,7 +1275,8 @@ def test_stack_timezone_aware_values(): def test_stack_empty_frame(): tm.assert_series_equal( - DataFrame().stack(), Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64) + DataFrame().stack(), + Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64) ) tm.assert_series_equal( DataFrame().stack(dropna=True), From bae2bd8808c96a7d397ec09bc3245a17d25f2314 Mon Sep 17 00:00:00 2001 From: steveya Date: Wed, 9 Sep 2020 17:06:57 +0800 Subject: [PATCH 08/23] remove trailing space...\ --- pandas/tests/frame/test_reshape.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 783e02427e0b4..835180fd9e18e 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -1275,7 +1275,7 @@ def test_stack_timezone_aware_values(): def test_stack_empty_frame(): tm.assert_series_equal( - DataFrame().stack(), + DataFrame().stack(), Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64) ) tm.assert_series_equal( From 047ae40f50499d1054ac0370210b01baf3115cf6 Mon Sep 17 00:00:00 2001 From: steveya Date: Wed, 9 Sep 2020 17:44:39 +0800 Subject: [PATCH 09/23] add a comma to pass black lint --- pandas/tests/frame/test_reshape.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 835180fd9e18e..6b25cbd063828 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -1276,7 +1276,7 @@ def test_stack_timezone_aware_values(): def test_stack_empty_frame(): tm.assert_series_equal( DataFrame().stack(), - Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64) + Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64), ) tm.assert_series_equal( DataFrame().stack(dropna=True), From c0fffe8c6bc01d192ee30399afe50793b5b5a038 Mon Sep 17 00:00:00 2001 From: steveya Date: Wed, 9 Sep 2020 21:23:36 +0800 Subject: [PATCH 10/23] simply fixes and parameterize tests --- pandas/core/reshape/reshape.py | 4 ++-- pandas/tests/frame/test_reshape.py | 25 +++++++++++++++---------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index ed5de5332c299..3f482064cdea2 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -513,11 +513,11 @@ def factorize(index): verify_integrity=False, ) - if frame._is_homogeneous_type: + if not frame.empty and frame._is_homogeneous_type: # For homogeneous EAs, frame._values will coerce to object. So # we concatenate instead. dtypes = list(frame.dtypes._values) - dtype = dtypes[0] if len(dtypes) > 0 else object + dtype = dtypes[0] if is_extension_array_dtype(dtype): arr = dtype.construct_array_type() diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 6b25cbd063828..e86731e5aa599 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -1273,16 +1273,21 @@ def test_stack_timezone_aware_values(): tm.assert_series_equal(result, expected) -def test_stack_empty_frame(): - tm.assert_series_equal( - DataFrame().stack(), - Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64), - ) - tm.assert_series_equal( - DataFrame().stack(dropna=True), - Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64), - ) - tm.assert_frame_equal(DataFrame().stack().unstack(), DataFrame()) +@pytest.mark.parametrize("dropna", [True, False]) +def test_stack_empty_frame(dropna): + # GH 36113 + expected = Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64) + result = DataFrame().stack(dropna=dropna) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("dropna", [True, False]) +@pytest.mark.parametrize("fill_value", [None, 0]) +def test_stack_unstack_empty_frame(dropna, fill_value): + # GH 36113 + expected = DataFrame() + result = DataFrame().stack(dropna=dropna).unstack(fill_value=fill_value) + tm.assert_frame_equal(result, expected) def test_unstacking_multi_index_df(): From 6b2b9bdedafe0fc5ac196fc5d0af2e202e267731 Mon Sep 17 00:00:00 2001 From: steveya Date: Sat, 12 Sep 2020 22:52:14 +0800 Subject: [PATCH 11/23] add error messages when unstack frame and series with single level index --- pandas/core/reshape/reshape.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 3f482064cdea2..c71afc28f0325 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -399,6 +399,12 @@ def _unstack_multiple(data, clocs, fill_value=None): def unstack(obj, level, fill_value=None): + # GH 36113 + # Give nicer error messages when unstack a Index that is not + # a MultiIndex. + if not isinstance(obj.index, MultiIndex): + raise ValueError("index must be a MultiIndex to unstack") + if isinstance(level, (tuple, list)): if len(level) != 1: # _unstack_multiple only handles MultiIndexes, @@ -414,8 +420,13 @@ def unstack(obj, level, fill_value=None): if isinstance(obj, DataFrame): if isinstance(obj.index, MultiIndex): return _unstack_frame(obj, level, fill_value=fill_value) - else: + elif isinstance(obj.columns, MultiIndex): return obj.T.stack(dropna=False) + else: + raise ValueError( + "either index or column of a DataFrame need to " + "be a MultiIndex to unstack." + ) else: if is_extension_array_dtype(obj.dtype): return _unstack_extension_series(obj, level, fill_value) From efc060325b50ec31ee0e1b16a784dcbeb4b7016e Mon Sep 17 00:00:00 2001 From: steveya Date: Sat, 12 Sep 2020 23:25:41 +0800 Subject: [PATCH 12/23] apply ValueError location --- pandas/core/reshape/reshape.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index c71afc28f0325..80d1633c3b4e2 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -399,11 +399,6 @@ def _unstack_multiple(data, clocs, fill_value=None): def unstack(obj, level, fill_value=None): - # GH 36113 - # Give nicer error messages when unstack a Index that is not - # a MultiIndex. - if not isinstance(obj.index, MultiIndex): - raise ValueError("index must be a MultiIndex to unstack") if isinstance(level, (tuple, list)): if len(level) != 1: @@ -423,11 +418,20 @@ def unstack(obj, level, fill_value=None): elif isinstance(obj.columns, MultiIndex): return obj.T.stack(dropna=False) else: + # GH 36113 + # Give nicer error messages when unstack a Index that is not + # a MultiIndex. raise ValueError( "either index or column of a DataFrame need to " "be a MultiIndex to unstack." ) else: + # GH 36113 + # Give nicer error messages when unstack a Index that is not + # a MultiIndex. + if not isinstance(obj.index, MultiIndex): + raise ValueError("index must be a MultiIndex to unstack") + if is_extension_array_dtype(obj.dtype): return _unstack_extension_series(obj, level, fill_value) unstacker = _Unstacker( From dac5f3238c7614307b4454aecad6c107fc790ee4 Mon Sep 17 00:00:00 2001 From: steveya Date: Sun, 13 Sep 2020 18:33:55 +0800 Subject: [PATCH 13/23] change the place where error is raised --- pandas/core/reshape/reshape.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 80d1633c3b4e2..80889cccf71d7 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -415,23 +415,14 @@ def unstack(obj, level, fill_value=None): if isinstance(obj, DataFrame): if isinstance(obj.index, MultiIndex): return _unstack_frame(obj, level, fill_value=fill_value) - elif isinstance(obj.columns, MultiIndex): - return obj.T.stack(dropna=False) else: - # GH 36113 - # Give nicer error messages when unstack a Index that is not - # a MultiIndex. - raise ValueError( - "either index or column of a DataFrame need to " - "be a MultiIndex to unstack." - ) - else: + return obj.T.stack(dropna=False) + elif not isinstance(obj.index, MultiIndex): # GH 36113 - # Give nicer error messages when unstack a Index that is not - # a MultiIndex. - if not isinstance(obj.index, MultiIndex): - raise ValueError("index must be a MultiIndex to unstack") - + # Give nicer error messages when unstack a Series whose + # Index is not a MultiIndex. + raise ValueError("index must be a MultiIndex to unstack") + else: if is_extension_array_dtype(obj.dtype): return _unstack_extension_series(obj, level, fill_value) unstacker = _Unstacker( From 6524a6c0286d9ebd3f1edaeb9fc0eaa021c71c65 Mon Sep 17 00:00:00 2001 From: steveya Date: Wed, 16 Sep 2020 17:07:15 +0800 Subject: [PATCH 14/23] add a test for unstack series with one level of index. elaborate changes in v1.2.0.rst --- doc/source/whatsnew/v1.2.0.rst | 4 ++-- pandas/tests/frame/test_reshape.py | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index d28bbdedec8e8..e39b739f1b7ab 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -322,8 +322,8 @@ Reshaping - Bug in :meth:`DataFrame.pivot_table` with ``aggfunc='count'`` or ``aggfunc='sum'`` returning ``NaN`` for missing categories when pivoted on a ``Categorical``. Now returning ``0`` (:issue:`31422`) - Bug in :func:`union_indexes` where input index names are not preserved in some cases. Affects :func:`concat` and :class:`DataFrame` constructor (:issue:`13475`) - Bug in func :meth:`crosstab` when using multiple columns with ``margins=True`` and ``normalize=True`` (:issue:`35144`) -- Bug in :meth:`DataFrame.stack` for empty DataFrame (:issue:`36113`) - +- Bug in :meth:`DataFrame.stack` where an empty DataFrame.stack would raise an error (:issue:`36113`). Now returning an empty Series with empty MultiIndex. +- Bug in :meth:`Series.unstack`. Now a Series with single level of Index trying to unstack would raise a ValueError. (:issue:`36113`) Sparse ^^^^^^ diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index e86731e5aa599..9f6a00fbd45d7 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -1285,11 +1285,18 @@ def test_stack_empty_frame(dropna): @pytest.mark.parametrize("fill_value", [None, 0]) def test_stack_unstack_empty_frame(dropna, fill_value): # GH 36113 - expected = DataFrame() result = DataFrame().stack(dropna=dropna).unstack(fill_value=fill_value) + expected = DataFrame() tm.assert_frame_equal(result, expected) +def test_unstack_single_index_series(): + # GH 36113 + msg = "index must be a MultiIndex to unstack" + with pytest.raises(ValueError, match=msg): + Series().unstack() + + def test_unstacking_multi_index_df(): # see gh-30740 df = DataFrame( From 6c7110124b1f2cd00bc08dd20ccdb314e5a280c2 Mon Sep 17 00:00:00 2001 From: steveya Date: Tue, 22 Sep 2020 20:30:10 +0800 Subject: [PATCH 15/23] adding type information to exception message. --- pandas/core/reshape/reshape.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 80889cccf71d7..1449db10ecd42 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -421,7 +421,9 @@ def unstack(obj, level, fill_value=None): # GH 36113 # Give nicer error messages when unstack a Series whose # Index is not a MultiIndex. - raise ValueError("index must be a MultiIndex to unstack") + raise ValueError( + "index must be a MultiIndex to unstack, " f"{type(obj.index)} was passed" + ) else: if is_extension_array_dtype(obj.dtype): return _unstack_extension_series(obj, level, fill_value) From f2f29bcd8cfc9eb22e7be4ab0dbaf9721424ddfb Mon Sep 17 00:00:00 2001 From: steveya Date: Sat, 24 Oct 2020 18:27:19 +0800 Subject: [PATCH 16/23] fix black format problem --- pandas/core/reshape/reshape.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 260e2f8d7d156..2995eedc66cce 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -422,7 +422,7 @@ def unstack(obj, level, fill_value=None): # Give nicer error messages when unstack a Series whose # Index is not a MultiIndex. raise ValueError( - "index must be a MultiIndex to unstack, " f"{type(obj.index)} was passed" + f"index must be a MultiIndex to unstack, {type(obj.index)} was passed" ) else: if is_extension_array_dtype(obj.dtype): From 148b77d6f44a67d49ec63052e36caf7eb38d2821 Mon Sep 17 00:00:00 2001 From: steveya Date: Thu, 5 Nov 2020 14:24:14 +0800 Subject: [PATCH 17/23] fix unittest assert error message --- pandas/tests/frame/test_stack_unstack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index d9610a4233717..bc3ea19665ac3 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1194,7 +1194,7 @@ def test_stack_unstack_empty_frame(dropna, fill_value): def test_unstack_single_index_series(): # GH 36113 - msg = "index must be a MultiIndex to unstack" + msg = r"index must be a MultiIndex to unstack.*" with pytest.raises(ValueError, match=msg): Series().unstack() From 99f828009fba37d86fcdbeb80ae8e694672ca2cb Mon Sep 17 00:00:00 2001 From: steveya Date: Thu, 5 Nov 2020 20:34:38 +0800 Subject: [PATCH 18/23] change dtype of empty series and dataframe in test --- pandas/tests/frame/test_stack_unstack.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index bc3ea19665ac3..0da0f4bb2b5c3 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1179,7 +1179,7 @@ def test_stack_timezone_aware_values(): def test_stack_empty_frame(dropna): # GH 36113 expected = Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64) - result = DataFrame().stack(dropna=dropna) + result = DataFrame(dtype=np.float64).stack(dropna=dropna) tm.assert_series_equal(result, expected) @@ -1187,8 +1187,8 @@ def test_stack_empty_frame(dropna): @pytest.mark.parametrize("fill_value", [None, 0]) def test_stack_unstack_empty_frame(dropna, fill_value): # GH 36113 - result = DataFrame().stack(dropna=dropna).unstack(fill_value=fill_value) - expected = DataFrame() + result = DataFrame(dtype=np.intp).stack(dropna=dropna).unstack(fill_value=fill_value) + expected = DataFrame(dtype=np.intp) tm.assert_frame_equal(result, expected) @@ -1196,7 +1196,7 @@ def test_unstack_single_index_series(): # GH 36113 msg = r"index must be a MultiIndex to unstack.*" with pytest.raises(ValueError, match=msg): - Series().unstack() + Series(dtype=np.intp).unstack() def test_unstacking_multi_index_df(): From c4e244a715b01c60eeb337ecadafd559ea38a675 Mon Sep 17 00:00:00 2001 From: steveya Date: Thu, 5 Nov 2020 22:33:21 +0800 Subject: [PATCH 19/23] formatting --- pandas/tests/frame/test_stack_unstack.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 0da0f4bb2b5c3..22eeb9493154b 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1187,7 +1187,9 @@ def test_stack_empty_frame(dropna): @pytest.mark.parametrize("fill_value", [None, 0]) def test_stack_unstack_empty_frame(dropna, fill_value): # GH 36113 - result = DataFrame(dtype=np.intp).stack(dropna=dropna).unstack(fill_value=fill_value) + result = ( + DataFrame(dtype=np.intp).stack(dropna=dropna).unstack(fill_value=fill_value) + ) expected = DataFrame(dtype=np.intp) tm.assert_frame_equal(result, expected) From 668189fa2cd478ceb5f5280ca7236884fc7c2cae Mon Sep 17 00:00:00 2001 From: steveya Date: Fri, 20 Nov 2020 14:19:10 +0800 Subject: [PATCH 20/23] change intp to int64 in testing of stack unstack empty frame --- pandas/tests/frame/test_stack_unstack.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 22eeb9493154b..9544bb44d19df 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1188,9 +1188,9 @@ def test_stack_empty_frame(dropna): def test_stack_unstack_empty_frame(dropna, fill_value): # GH 36113 result = ( - DataFrame(dtype=np.intp).stack(dropna=dropna).unstack(fill_value=fill_value) + DataFrame(dtype=np.int64).stack(dropna=dropna).unstack(fill_value=fill_value) ) - expected = DataFrame(dtype=np.intp) + expected = DataFrame(dtype=np.int64) tm.assert_frame_equal(result, expected) @@ -1198,7 +1198,7 @@ def test_unstack_single_index_series(): # GH 36113 msg = r"index must be a MultiIndex to unstack.*" with pytest.raises(ValueError, match=msg): - Series(dtype=np.intp).unstack() + Series(dtype=np.int64).unstack() def test_unstacking_multi_index_df(): From 4f95523670304ab7a74068e19c6fa65ba161b5ac Mon Sep 17 00:00:00 2001 From: steveya Date: Sun, 22 Nov 2020 19:34:10 +0800 Subject: [PATCH 21/23] ensure indexer is of type int64 --- pandas/core/sorting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 2a0da8b0fb35c..729f517c789a7 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -610,7 +610,7 @@ def compress_group_index(group_index, sort: bool = True): if sort and len(obs_group_ids) > 0: obs_group_ids, comp_ids = _reorder_by_uniques(obs_group_ids, comp_ids) - return comp_ids, obs_group_ids + return ensure_int64(comp_ids), ensure_int64(obs_group_ids) def _reorder_by_uniques(uniques, labels): From 475f158b898f5fc1759375a155b532429f4738bc Mon Sep 17 00:00:00 2001 From: steveya Date: Thu, 26 Nov 2020 17:31:21 +0800 Subject: [PATCH 22/23] remove xfail --- pandas/tests/reshape/test_pivot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 5a28cd5c418f0..800adc41123ed 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2102,7 +2102,6 @@ def test_pivot_duplicates(self): with pytest.raises(ValueError, match="duplicate entries"): data.pivot("a", "b", "c") - @pytest.mark.xfail(not IS64, reason="GH 36579: fail on 32-bit system") def test_pivot_empty(self): df = DataFrame(columns=["a", "b", "c"]) result = df.pivot("a", "b", "c") From bdf49d3851e75cacb0306e04edf91c2f281c56e9 Mon Sep 17 00:00:00 2001 From: steveya Date: Thu, 26 Nov 2020 18:48:15 +0800 Subject: [PATCH 23/23] remove unsed import --- pandas/tests/reshape/test_pivot.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 800adc41123ed..f9b2a02920841 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -4,8 +4,6 @@ import numpy as np import pytest -from pandas.compat import IS64 - import pandas as pd from pandas import ( Categorical,