From 7e461a18d9f6928132afec6f48ce968b3e989ba6 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Mon, 3 Dec 2018 17:43:52 +0100 Subject: [PATCH 1/5] remove \n from docstring --- pandas/core/arrays/datetimes.py | 26 +++++++++++++------------- pandas/core/arrays/timedeltas.py | 16 ++++++++-------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cfe3afcf3730a..b3df505d56d78 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -82,7 +82,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -1072,19 +1072,19 @@ def date(self): return tslib.ints_to_pydatetime(timestamps, box="date") - year = _field_accessor('year', 'Y', "\n The year of the datetime\n") + year = _field_accessor('year', 'Y', "The year of the datetime") month = _field_accessor('month', 'M', - "\n The month as January=1, December=12 \n") - day = _field_accessor('day', 'D', "\nThe days of the datetime\n") - hour = _field_accessor('hour', 'h', "\nThe hours of the datetime\n") - minute = _field_accessor('minute', 'm', "\nThe minutes of the datetime\n") - second = _field_accessor('second', 's', "\nThe seconds of the datetime\n") + "The month as January=1, December=12") + day = _field_accessor('day', 'D', "The days of the datetime") + hour = _field_accessor('hour', 'h', "The hours of the datetime") + minute = _field_accessor('minute', 'm', "The minutes of the datetime") + second = _field_accessor('second', 's', "The seconds of the datetime") microsecond = _field_accessor('microsecond', 'us', - "\nThe microseconds of the datetime\n") + "The microseconds of the datetime") nanosecond = _field_accessor('nanosecond', 'ns', - "\nThe nanoseconds of the datetime\n") + "The nanoseconds of the datetime") weekofyear = _field_accessor('weekofyear', 'woy', - "\nThe week ordinal of the year\n") + "The week ordinal of the year") week = weekofyear _dayofweek_doc = """ The day of the week with Monday=0, Sunday=6. @@ -1129,12 +1129,12 @@ def date(self): "The name of day in a week (ex: Friday)\n\n.. deprecated:: 0.23.0") dayofyear = _field_accessor('dayofyear', 'doy', - "\nThe ordinal day of the year\n") - quarter = _field_accessor('quarter', 'q', "\nThe quarter of the date\n") + "The ordinal day of the year") + quarter = _field_accessor('quarter', 'q', "The quarter of the date") days_in_month = _field_accessor( 'days_in_month', 'dim', - "\nThe number of days in the month\n") + "The number of days in the month") daysinmonth = days_in_month _is_month_doc = """ Indicates whether the date is the {first_or_last} day of the month. diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 830283d31a929..4afc9f5483c2a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -59,7 +59,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -684,16 +684,16 @@ def to_pytimedelta(self): return tslibs.ints_to_pytimedelta(self.asi8) days = _field_accessor("days", "days", - "\nNumber of days for each element.\n") + "Number of days for each element.") seconds = _field_accessor("seconds", "seconds", - "\nNumber of seconds (>= 0 and less than 1 day) " - "for each element.\n") + "Number of seconds (>= 0 and less than 1 day) " + "for each element.") microseconds = _field_accessor("microseconds", "microseconds", - "\nNumber of microseconds (>= 0 and less " - "than 1 second) for each element.\n") + "Number of microseconds (>= 0 and less " + "than 1 second) for each element.") nanoseconds = _field_accessor("nanoseconds", "nanoseconds", - "\nNumber of nanoseconds (>= 0 and less " - "than 1 microsecond) for each element.\n") + "Number of nanoseconds (>= 0 and less " + "than 1 microsecond) for each element.") @property def components(self): From 6bff8aff41479426957dd00031746ee3e773272f Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 12 Jan 2020 17:46:19 +0100 Subject: [PATCH 2/5] fix issue 19966 --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/reshape/reshape.py | 2 + pandas/tests/frame/test_reshape.py | 74 +++++++++++++++++++++++++++++ pandas/tests/series/test_reshape.py | 59 +++++++++++++++++++++++ 4 files changed, 136 insertions(+) create mode 100644 pandas/tests/series/test_reshape.py diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 5f79accc5c679..6abad0922416f 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1133,6 +1133,7 @@ Reshaping - Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`) - Improved error message and docstring in :func:`cut` and :func:`qcut` when `labels=True` (:issue:`13318`) - Bug in missing `fill_na` parameter to :meth:`DataFrame.unstack` with list of levels (:issue:`30740`) +- Bug in :func:`unstack` can take tuple names in MultiIndexed data (:issue:`19966`) Sparse ^^^^^^ diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 97f416e32d07b..a96f5d602713c 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -317,6 +317,8 @@ def _unstack_multiple(data, clocs, fill_value=None): index = data.index + if clocs in index.names: + clocs = [clocs] clocs = [index._get_level_number(i) for i in clocs] rlocs = [i for i in range(index.nlevels) if i not in clocs] diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 56a0c8cf4f5bd..014d6522cfe9b 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -336,6 +336,80 @@ def test_unstack_fill_frame_categorical(self): ) tm.assert_frame_equal(result, expected) + def test_unstack_tuplename_in_multiindex(self): + # GH 19966 + idx = pd.MultiIndex.from_product( + [["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")] + ) + df = pd.DataFrame({"d": [1] * 9, "e": [2] * 9}, index=idx) + result = df.unstack(("A", "a")) + + expected = pd.DataFrame( + [[1, 1, 1, 2, 2, 2], [1, 1, 1, 2, 2, 2], [1, 1, 1, 2, 2, 2]], + columns=pd.MultiIndex.from_tuples( + [ + ("d", "a"), + ("d", "b"), + ("d", "c"), + ("e", "a"), + ("e", "b"), + ("e", "c"), + ], + names=[None, ("A", "a")], + ), + index=pd.Index([1, 2, 3], name=("B", "b")), + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "unstack_idx, expected_values, expected_index, expected_columns", + [ + ( + ("A", "a"), + [[1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2]], + pd.MultiIndex.from_tuples( + [(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"] + ), + pd.MultiIndex.from_tuples( + [("d", "a"), ("d", "b"), ("e", "a"), ("e", "b")], + names=[None, ("A", "a")], + ), + ), + ( + (("A", "a"), "B"), + [[1, 1, 1, 1, 2, 2, 2, 2], [1, 1, 1, 1, 2, 2, 2, 2]], + pd.Index([3, 4], name="C"), + pd.MultiIndex.from_tuples( + [ + ("d", "a", 1), + ("d", "a", 2), + ("d", "b", 1), + ("d", "b", 2), + ("e", "a", 1), + ("e", "a", 2), + ("e", "b", 1), + ("e", "b", 2), + ], + names=[None, ("A", "a"), "B"], + ), + ), + ], + ) + def test_unstack_mixed_type_name_in_multiindex( + self, unstack_idx, expected_values, expected_index, expected_columns + ): + # GH 19966 + idx = pd.MultiIndex.from_product( + [["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"] + ) + df = pd.DataFrame({"d": [1] * 8, "e": [2] * 8}, index=idx) + result = df.unstack(unstack_idx) + + expected = pd.DataFrame( + expected_values, columns=expected_columns, index=expected_index, + ) + tm.assert_frame_equal(result, expected) + def test_unstack_preserve_dtypes(self): # Checks fix for #11847 df = pd.DataFrame( diff --git a/pandas/tests/series/test_reshape.py b/pandas/tests/series/test_reshape.py new file mode 100644 index 0000000000000..bb7d1fcdd4070 --- /dev/null +++ b/pandas/tests/series/test_reshape.py @@ -0,0 +1,59 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + + +def test_unstack_tuplename_in_multiindex(): + # GH 19966 + idx = pd.MultiIndex.from_product( + [["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")] + ) + ser = pd.Series(1, index=idx) + result = ser.unstack(("A", "a")) + + expected = pd.DataFrame( + [[1, 1, 1], [1, 1, 1], [1, 1, 1]], + columns=pd.MultiIndex.from_tuples( + [("a",), ("b",), ("c",)], names=[("A", "a")], + ), + index=pd.Index([1, 2, 3], name=("B", "b")), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "unstack_idx, expected_values, expected_index, expected_columns", + [ + ( + ("A", "a"), + [[1, 1], [1, 1], [1, 1], [1, 1]], + pd.MultiIndex.from_tuples( + [(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"] + ), + pd.MultiIndex.from_tuples([("a",), ("b",)], names=[("A", "a")]), + ), + ( + (("A", "a"), "B"), + [[1, 1, 1, 1], [1, 1, 1, 1]], + pd.Index([3, 4], name="C"), + pd.MultiIndex.from_tuples( + [("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=[("A", "a"), "B"] + ), + ), + ], +) +def test_unstack_mixed_type_name_in_multiindex( + unstack_idx, expected_values, expected_index, expected_columns +): + # GH 19966 + idx = pd.MultiIndex.from_product( + [["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"] + ) + ser = pd.Series(1, index=idx) + result = ser.unstack(unstack_idx) + + expected = pd.DataFrame( + expected_values, columns=expected_columns, index=expected_index, + ) + tm.assert_frame_equal(result, expected) From e87370792ef1942149f1ca0cce3bfd67c615d338 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 12 Jan 2020 20:36:07 +0100 Subject: [PATCH 3/5] move to analytics --- pandas/tests/series/test_analytics.py | 53 ++++++++++++++++++++++++ pandas/tests/series/test_reshape.py | 59 --------------------------- 2 files changed, 53 insertions(+), 59 deletions(-) delete mode 100644 pandas/tests/series/test_reshape.py diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index c29bd3ea0cb7d..5705f08e98c26 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -219,6 +219,59 @@ def test_unstack(self): right.index = pd.MultiIndex.from_tuples(tpls) tm.assert_frame_equal(ts.unstack(level=0), right) + def test_unstack_tuplename_in_multiindex(self): + # GH 19966 + idx = pd.MultiIndex.from_product( + [["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")] + ) + ser = pd.Series(1, index=idx) + result = ser.unstack(("A", "a")) + + expected = pd.DataFrame( + [[1, 1, 1], [1, 1, 1], [1, 1, 1]], + columns=pd.MultiIndex.from_tuples( + [("a",), ("b",), ("c",)], names=[("A", "a")], + ), + index=pd.Index([1, 2, 3], name=("B", "b")), + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "unstack_idx, expected_values, expected_index, expected_columns", + [ + ( + ("A", "a"), + [[1, 1], [1, 1], [1, 1], [1, 1]], + pd.MultiIndex.from_tuples( + [(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"] + ), + pd.MultiIndex.from_tuples([("a",), ("b",)], names=[("A", "a")]), + ), + ( + (("A", "a"), "B"), + [[1, 1, 1, 1], [1, 1, 1, 1]], + pd.Index([3, 4], name="C"), + pd.MultiIndex.from_tuples( + [("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=[("A", "a"), "B"] + ), + ), + ], + ) + def test_unstack_mixed_type_name_in_multiindex( + self, unstack_idx, expected_values, expected_index, expected_columns + ): + # GH 19966 + idx = pd.MultiIndex.from_product( + [["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"] + ) + ser = pd.Series(1, index=idx) + result = ser.unstack(unstack_idx) + + expected = pd.DataFrame( + expected_values, columns=expected_columns, index=expected_index, + ) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("func", [np.any, np.all]) @pytest.mark.parametrize("kwargs", [dict(keepdims=True), dict(out=object())]) @td.skip_if_np_lt("1.15") diff --git a/pandas/tests/series/test_reshape.py b/pandas/tests/series/test_reshape.py deleted file mode 100644 index bb7d1fcdd4070..0000000000000 --- a/pandas/tests/series/test_reshape.py +++ /dev/null @@ -1,59 +0,0 @@ -import pytest - -import pandas as pd -import pandas._testing as tm - - -def test_unstack_tuplename_in_multiindex(): - # GH 19966 - idx = pd.MultiIndex.from_product( - [["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")] - ) - ser = pd.Series(1, index=idx) - result = ser.unstack(("A", "a")) - - expected = pd.DataFrame( - [[1, 1, 1], [1, 1, 1], [1, 1, 1]], - columns=pd.MultiIndex.from_tuples( - [("a",), ("b",), ("c",)], names=[("A", "a")], - ), - index=pd.Index([1, 2, 3], name=("B", "b")), - ) - tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize( - "unstack_idx, expected_values, expected_index, expected_columns", - [ - ( - ("A", "a"), - [[1, 1], [1, 1], [1, 1], [1, 1]], - pd.MultiIndex.from_tuples( - [(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"] - ), - pd.MultiIndex.from_tuples([("a",), ("b",)], names=[("A", "a")]), - ), - ( - (("A", "a"), "B"), - [[1, 1, 1, 1], [1, 1, 1, 1]], - pd.Index([3, 4], name="C"), - pd.MultiIndex.from_tuples( - [("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=[("A", "a"), "B"] - ), - ), - ], -) -def test_unstack_mixed_type_name_in_multiindex( - unstack_idx, expected_values, expected_index, expected_columns -): - # GH 19966 - idx = pd.MultiIndex.from_product( - [["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"] - ) - ser = pd.Series(1, index=idx) - result = ser.unstack(unstack_idx) - - expected = pd.DataFrame( - expected_values, columns=expected_columns, index=expected_index, - ) - tm.assert_frame_equal(result, expected) From 0c39036fe328990ab9c0a124aed58923bb5ba376 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 12 Jan 2020 20:37:56 +0100 Subject: [PATCH 4/5] update whatsnew and fix pep8 --- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/tests/series/test_analytics.py | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 6abad0922416f..a6d13cb693ad3 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1133,7 +1133,7 @@ Reshaping - Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`) - Improved error message and docstring in :func:`cut` and :func:`qcut` when `labels=True` (:issue:`13318`) - Bug in missing `fill_na` parameter to :meth:`DataFrame.unstack` with list of levels (:issue:`30740`) -- Bug in :func:`unstack` can take tuple names in MultiIndexed data (:issue:`19966`) +- Bug in :meth:`DataFrame.unstack` and :meth:`Series.unstack` can take tuple names in MultiIndexed data (:issue:`19966`) Sparse ^^^^^^ diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 5705f08e98c26..62b88424831ce 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -240,25 +240,25 @@ def test_unstack_tuplename_in_multiindex(self): "unstack_idx, expected_values, expected_index, expected_columns", [ ( - ("A", "a"), - [[1, 1], [1, 1], [1, 1], [1, 1]], - pd.MultiIndex.from_tuples( - [(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"] - ), - pd.MultiIndex.from_tuples([("a",), ("b",)], names=[("A", "a")]), + ("A", "a"), + [[1, 1], [1, 1], [1, 1], [1, 1]], + pd.MultiIndex.from_tuples( + [(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"] + ), + pd.MultiIndex.from_tuples([("a",), ("b",)], names=[("A", "a")]), ), ( - (("A", "a"), "B"), - [[1, 1, 1, 1], [1, 1, 1, 1]], - pd.Index([3, 4], name="C"), - pd.MultiIndex.from_tuples( - [("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=[("A", "a"), "B"] - ), + (("A", "a"), "B"), + [[1, 1, 1, 1], [1, 1, 1, 1]], + pd.Index([3, 4], name="C"), + pd.MultiIndex.from_tuples( + [("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=[("A", "a"), "B"] + ), ), ], ) def test_unstack_mixed_type_name_in_multiindex( - self, unstack_idx, expected_values, expected_index, expected_columns + self, unstack_idx, expected_values, expected_index, expected_columns ): # GH 19966 idx = pd.MultiIndex.from_product( From bd38e5fc8a8be18eb4dd5f306657d89decb0715f Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Wed, 15 Jan 2020 07:45:32 +0100 Subject: [PATCH 5/5] code change based on reviews --- doc/source/whatsnew/v1.0.0.rst | 1 - doc/source/whatsnew/v1.1.0.rst | 2 + pandas/core/reshape/reshape.py | 2 + pandas/tests/series/test_analytics.py | 114 +----------------------- pandas/tests/series/test_reshaping.py | 120 ++++++++++++++++++++++++++ 5 files changed, 125 insertions(+), 114 deletions(-) create mode 100644 pandas/tests/series/test_reshaping.py diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 598d8f47954fe..c423933d4c438 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -1133,7 +1133,6 @@ Reshaping - Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`) - Improved error message and docstring in :func:`cut` and :func:`qcut` when `labels=True` (:issue:`13318`) - Bug in missing `fill_na` parameter to :meth:`DataFrame.unstack` with list of levels (:issue:`30740`) -- Bug in :meth:`DataFrame.unstack` and :meth:`Series.unstack` can take tuple names in MultiIndexed data (:issue:`19966`) Sparse ^^^^^^ diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 721bcb0758992..acc0d0998b36b 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -141,6 +141,8 @@ Reshaping - - Bug in :meth:`DataFrame.pivot_table` when only MultiIndexed columns is set (:issue:`17038`) +- Bug in :meth:`DataFrame.unstack` and :meth:`Series.unstack` can take tuple names in MultiIndexed data (:issue:`19966`) + Sparse ^^^^^^ diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index a96f5d602713c..fab9f41cb6c4f 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -317,6 +317,8 @@ def _unstack_multiple(data, clocs, fill_value=None): index = data.index + # GH 19966 Make sure if MultiIndexed index has tuple name, they will be + # recognised as a whole if clocs in index.names: clocs = [clocs] clocs = [index._get_level_number(i) for i in clocs] diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 62b88424831ce..e6e91b5d4f5f4 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -6,7 +6,7 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, MultiIndex, Series +from pandas import DataFrame, Series import pandas._testing as tm @@ -160,118 +160,6 @@ def test_is_monotonic(self): assert s.is_monotonic is False assert s.is_monotonic_decreasing is True - def test_unstack(self): - - index = MultiIndex( - levels=[["bar", "foo"], ["one", "three", "two"]], - codes=[[1, 1, 0, 0], [0, 1, 0, 2]], - ) - - s = Series(np.arange(4.0), index=index) - unstacked = s.unstack() - - expected = DataFrame( - [[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]], - index=["bar", "foo"], - columns=["one", "three", "two"], - ) - - tm.assert_frame_equal(unstacked, expected) - - unstacked = s.unstack(level=0) - tm.assert_frame_equal(unstacked, expected.T) - - index = MultiIndex( - levels=[["bar"], ["one", "two", "three"], [0, 1]], - codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], - ) - s = Series(np.random.randn(6), index=index) - exp_index = MultiIndex( - levels=[["one", "two", "three"], [0, 1]], - codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], - ) - expected = DataFrame({"bar": s.values}, index=exp_index).sort_index(level=0) - unstacked = s.unstack(0).sort_index() - tm.assert_frame_equal(unstacked, expected) - - # GH5873 - idx = pd.MultiIndex.from_arrays([[101, 102], [3.5, np.nan]]) - ts = pd.Series([1, 2], index=idx) - left = ts.unstack() - right = DataFrame( - [[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5] - ) - tm.assert_frame_equal(left, right) - - idx = pd.MultiIndex.from_arrays( - [ - ["cat", "cat", "cat", "dog", "dog"], - ["a", "a", "b", "a", "b"], - [1, 2, 1, 1, np.nan], - ] - ) - ts = pd.Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx) - right = DataFrame( - [[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]], - columns=["cat", "dog"], - ) - tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)] - right.index = pd.MultiIndex.from_tuples(tpls) - tm.assert_frame_equal(ts.unstack(level=0), right) - - def test_unstack_tuplename_in_multiindex(self): - # GH 19966 - idx = pd.MultiIndex.from_product( - [["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")] - ) - ser = pd.Series(1, index=idx) - result = ser.unstack(("A", "a")) - - expected = pd.DataFrame( - [[1, 1, 1], [1, 1, 1], [1, 1, 1]], - columns=pd.MultiIndex.from_tuples( - [("a",), ("b",), ("c",)], names=[("A", "a")], - ), - index=pd.Index([1, 2, 3], name=("B", "b")), - ) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( - "unstack_idx, expected_values, expected_index, expected_columns", - [ - ( - ("A", "a"), - [[1, 1], [1, 1], [1, 1], [1, 1]], - pd.MultiIndex.from_tuples( - [(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"] - ), - pd.MultiIndex.from_tuples([("a",), ("b",)], names=[("A", "a")]), - ), - ( - (("A", "a"), "B"), - [[1, 1, 1, 1], [1, 1, 1, 1]], - pd.Index([3, 4], name="C"), - pd.MultiIndex.from_tuples( - [("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=[("A", "a"), "B"] - ), - ), - ], - ) - def test_unstack_mixed_type_name_in_multiindex( - self, unstack_idx, expected_values, expected_index, expected_columns - ): - # GH 19966 - idx = pd.MultiIndex.from_product( - [["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"] - ) - ser = pd.Series(1, index=idx) - result = ser.unstack(unstack_idx) - - expected = pd.DataFrame( - expected_values, columns=expected_columns, index=expected_index, - ) - tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("func", [np.any, np.all]) @pytest.mark.parametrize("kwargs", [dict(keepdims=True), dict(out=object())]) @td.skip_if_np_lt("1.15") diff --git a/pandas/tests/series/test_reshaping.py b/pandas/tests/series/test_reshaping.py new file mode 100644 index 0000000000000..7645fb8759a54 --- /dev/null +++ b/pandas/tests/series/test_reshaping.py @@ -0,0 +1,120 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, MultiIndex, Series +import pandas._testing as tm + + +def test_unstack(): + index = MultiIndex( + levels=[["bar", "foo"], ["one", "three", "two"]], + codes=[[1, 1, 0, 0], [0, 1, 0, 2]], + ) + + s = Series(np.arange(4.0), index=index) + unstacked = s.unstack() + + expected = DataFrame( + [[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]], + index=["bar", "foo"], + columns=["one", "three", "two"], + ) + + tm.assert_frame_equal(unstacked, expected) + + unstacked = s.unstack(level=0) + tm.assert_frame_equal(unstacked, expected.T) + + index = MultiIndex( + levels=[["bar"], ["one", "two", "three"], [0, 1]], + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + ) + s = Series(np.random.randn(6), index=index) + exp_index = MultiIndex( + levels=[["one", "two", "three"], [0, 1]], + codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + ) + expected = DataFrame({"bar": s.values}, index=exp_index).sort_index(level=0) + unstacked = s.unstack(0).sort_index() + tm.assert_frame_equal(unstacked, expected) + + # GH5873 + idx = pd.MultiIndex.from_arrays([[101, 102], [3.5, np.nan]]) + ts = pd.Series([1, 2], index=idx) + left = ts.unstack() + right = DataFrame( + [[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5] + ) + tm.assert_frame_equal(left, right) + + idx = pd.MultiIndex.from_arrays( + [ + ["cat", "cat", "cat", "dog", "dog"], + ["a", "a", "b", "a", "b"], + [1, 2, 1, 1, np.nan], + ] + ) + ts = pd.Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx) + right = DataFrame( + [[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]], + columns=["cat", "dog"], + ) + tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)] + right.index = pd.MultiIndex.from_tuples(tpls) + tm.assert_frame_equal(ts.unstack(level=0), right) + + +def test_unstack_tuplename_in_multiindex(): + # GH 19966 + idx = pd.MultiIndex.from_product( + [["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")] + ) + ser = pd.Series(1, index=idx) + result = ser.unstack(("A", "a")) + + expected = pd.DataFrame( + [[1, 1, 1], [1, 1, 1], [1, 1, 1]], + columns=pd.MultiIndex.from_tuples( + [("a",), ("b",), ("c",)], names=[("A", "a")], + ), + index=pd.Index([1, 2, 3], name=("B", "b")), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "unstack_idx, expected_values, expected_index, expected_columns", + [ + ( + ("A", "a"), + [[1, 1], [1, 1], [1, 1], [1, 1]], + pd.MultiIndex.from_tuples( + [(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"] + ), + pd.MultiIndex.from_tuples([("a",), ("b",)], names=[("A", "a")]), + ), + ( + (("A", "a"), "B"), + [[1, 1, 1, 1], [1, 1, 1, 1]], + pd.Index([3, 4], name="C"), + pd.MultiIndex.from_tuples( + [("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=[("A", "a"), "B"] + ), + ), + ], +) +def test_unstack_mixed_type_name_in_multiindex( + unstack_idx, expected_values, expected_index, expected_columns +): + # GH 19966 + idx = pd.MultiIndex.from_product( + [["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"] + ) + ser = pd.Series(1, index=idx) + result = ser.unstack(unstack_idx) + + expected = pd.DataFrame( + expected_values, columns=expected_columns, index=expected_index, + ) + tm.assert_frame_equal(result, expected)