From 7e461a18d9f6928132afec6f48ce968b3e989ba6 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Mon, 3 Dec 2018 17:43:52 +0100 Subject: [PATCH 01/14] remove \n from docstring --- pandas/core/arrays/datetimes.py | 26 +++++++++++++------------- pandas/core/arrays/timedeltas.py | 16 ++++++++-------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cfe3afcf3730a..b3df505d56d78 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -82,7 +82,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -1072,19 +1072,19 @@ def date(self): return tslib.ints_to_pydatetime(timestamps, box="date") - year = _field_accessor('year', 'Y', "\n The year of the datetime\n") + year = _field_accessor('year', 'Y', "The year of the datetime") month = _field_accessor('month', 'M', - "\n The month as January=1, December=12 \n") - day = _field_accessor('day', 'D', "\nThe days of the datetime\n") - hour = _field_accessor('hour', 'h', "\nThe hours of the datetime\n") - minute = _field_accessor('minute', 'm', "\nThe minutes of the datetime\n") - second = _field_accessor('second', 's', "\nThe seconds of the datetime\n") + "The month as January=1, December=12") + day = _field_accessor('day', 'D', "The days of the datetime") + hour = _field_accessor('hour', 'h', "The hours of the datetime") + minute = _field_accessor('minute', 'm', "The minutes of the datetime") + second = _field_accessor('second', 's', "The seconds of the datetime") microsecond = _field_accessor('microsecond', 'us', - "\nThe microseconds of the datetime\n") + "The microseconds of the datetime") nanosecond = _field_accessor('nanosecond', 'ns', - "\nThe nanoseconds of the datetime\n") + "The nanoseconds of the datetime") weekofyear = _field_accessor('weekofyear', 'woy', - "\nThe week ordinal of the year\n") + "The week ordinal of the year") week = weekofyear _dayofweek_doc = """ The day of the week with Monday=0, Sunday=6. @@ -1129,12 +1129,12 @@ def date(self): "The name of day in a week (ex: Friday)\n\n.. deprecated:: 0.23.0") dayofyear = _field_accessor('dayofyear', 'doy', - "\nThe ordinal day of the year\n") - quarter = _field_accessor('quarter', 'q', "\nThe quarter of the date\n") + "The ordinal day of the year") + quarter = _field_accessor('quarter', 'q', "The quarter of the date") days_in_month = _field_accessor( 'days_in_month', 'dim', - "\nThe number of days in the month\n") + "The number of days in the month") daysinmonth = days_in_month _is_month_doc = """ Indicates whether the date is the {first_or_last} day of the month. diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 830283d31a929..4afc9f5483c2a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -59,7 +59,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -684,16 +684,16 @@ def to_pytimedelta(self): return tslibs.ints_to_pytimedelta(self.asi8) days = _field_accessor("days", "days", - "\nNumber of days for each element.\n") + "Number of days for each element.") seconds = _field_accessor("seconds", "seconds", - "\nNumber of seconds (>= 0 and less than 1 day) " - "for each element.\n") + "Number of seconds (>= 0 and less than 1 day) " + "for each element.") microseconds = _field_accessor("microseconds", "microseconds", - "\nNumber of microseconds (>= 0 and less " - "than 1 second) for each element.\n") + "Number of microseconds (>= 0 and less " + "than 1 second) for each element.") nanoseconds = _field_accessor("nanoseconds", "nanoseconds", - "\nNumber of nanoseconds (>= 0 and less " - "than 1 microsecond) for each element.\n") + "Number of nanoseconds (>= 0 and less " + "than 1 microsecond) for each element.") @property def components(self): From f7b37d628229cf4811260f97d40a280893230795 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 11 Jan 2020 22:00:41 +0100 Subject: [PATCH 02/14] Allow multi values for index and columns in pivot --- pandas/core/reshape/pivot.py | 22 +++- pandas/tests/reshape/test_pivot.py | 193 +++++++++++++++++++++++++++++ 2 files changed, 211 insertions(+), 4 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index b443ba142369c..f80bf83930315 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -427,16 +427,30 @@ def _convert_by(by): @Substitution("\ndata : DataFrame") @Appender(_shared_docs["pivot"], indents=1) def pivot(data: "DataFrame", index=None, columns=None, values=None) -> "DataFrame": + + columns = columns if is_list_like(columns) else [columns] if values is None: - cols = [columns] if index is None else [index, columns] + if index is None: + cols = [] + elif is_list_like(index): + cols = [column for column in index] + else: + cols = [index] + cols.extend(columns) + append = index is None indexed = data.set_index(cols, append=append) else: if index is None: - index = data.index + index = [Series(data.index, name=data.index.name)] + elif is_list_like(index): + index = [data[column] for column in index] else: - index = data[index] - index = MultiIndex.from_arrays([index, data[columns]]) + index = [data[index]] + + data_columns = [data[col] for col in columns] + index.extend(data_columns) + index = MultiIndex.from_arrays(index) if is_list_like(values) and not isinstance(values, tuple): # Exclude tuple because it is seen as a single column name diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 743fc50c87e96..f707db9b67a12 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -990,6 +990,199 @@ def test_pivot_no_level_overlap(self): expected = grouped.unstack("b").unstack("c").dropna(axis=1, how="all") tm.assert_frame_equal(table, expected) + @pytest.mark.parametrize( + "input_index, input_columns, input_values, expected_values, expected_columns, expected_index", + [ + ( + ["lev4"], + "lev3", + "values", + [ + [0.0, np.nan], + [np.nan, 1.0], + [2.0, np.nan], + [np.nan, 3.0], + [4.0, np.nan], + [np.nan, 5.0], + [6.0, np.nan], + [np.nan, 7.0], + ], + Index([1, 2], name="lev3"), + Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"), + ), + ( + ["lev4"], + "lev3", + None, + [ + [1.0, np.nan, 1.0, np.nan, 0.0, np.nan], + [np.nan, 1.0, np.nan, 1.0, np.nan, 1.0], + [1.0, np.nan, 2.0, np.nan, 2.0, np.nan], + [np.nan, 1.0, np.nan, 2.0, np.nan, 3.0], + [2.0, np.nan, 1.0, np.nan, 4.0, np.nan], + [np.nan, 2.0, np.nan, 1.0, np.nan, 5.0], + [2.0, np.nan, 2.0, np.nan, 6.0, np.nan], + [np.nan, 2.0, np.nan, 2.0, np.nan, 7.0], + ], + MultiIndex.from_tuples( + [ + ("lev1", 1), + ("lev1", 2), + ("lev2", 1), + ("lev2", 2), + ("values", 1), + ("values", 2), + ], + names=[None, "lev3"], + ), + Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"), + ), + ( + ["lev1", "lev2"], + "lev3", + "values", + [[0, 1], [2, 3], [4, 5], [6, 7]], + Index([1, 2], name="lev3"), + MultiIndex.from_tuples( + [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"] + ), + ), + ( + ["lev1", "lev2"], + "lev3", + None, + [[1, 2, 0, 1], [3, 4, 2, 3], [5, 6, 4, 5], [7, 8, 6, 7]], + MultiIndex.from_tuples( + [("lev4", 1), ("lev4", 2), ("values", 1), ("values", 2)], + names=[None, "lev3"], + ), + MultiIndex.from_tuples( + [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"] + ), + ), + ], + ) + def test_pivot_list_like_index( + self, + input_index, + input_columns, + input_values, + expected_values, + expected_columns, + expected_index, + ): + # GH 21425, test when index is given a list + df = pd.DataFrame( + { + "lev1": [1, 1, 1, 1, 2, 2, 2, 2], + "lev2": [1, 1, 2, 2, 1, 1, 2, 2], + "lev3": [1, 2, 1, 2, 1, 2, 1, 2], + "lev4": [1, 2, 3, 4, 5, 6, 7, 8], + "values": [0, 1, 2, 3, 4, 5, 6, 7], + } + ) + + result = df.pivot(index=input_index, columns=input_columns, values=input_values) + expected = pd.DataFrame( + expected_values, columns=expected_columns, index=expected_index + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "input_index, input_columns, input_values, expected_values, expected_columns, expected_index", + [ + ( + "lev4", + ["lev3"], + "values", + [ + [0.0, np.nan], + [np.nan, 1.0], + [2.0, np.nan], + [np.nan, 3.0], + [4.0, np.nan], + [np.nan, 5.0], + [6.0, np.nan], + [np.nan, 7.0], + ], + Index([1, 2], name="lev3"), + Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"), + ), + ( + ["lev1", "lev2"], + ["lev3"], + "values", + [[0, 1], [2, 3], [4, 5], [6, 7]], + Index([1, 2], name="lev3"), + MultiIndex.from_tuples( + [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"] + ), + ), + ( + ["lev1"], + ["lev2", "lev3"], + "values", + [[0, 1, 2, 3], [4, 5, 6, 7]], + MultiIndex.from_tuples( + [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev2", "lev3"] + ), + Index([1, 2], name="lev1"), + ), + ( + ["lev1", "lev2"], + ["lev3", "lev4"], + "values", + [ + [0.0, 1.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, 2.0, 3.0, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, 4.0, 5.0, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 6.0, 7.0], + ], + MultiIndex.from_tuples( + [ + ("values", 1, 1), + ("values", 2, 2), + ("values", 1, 3), + ("values", 2, 4), + ("values", 1, 5), + ("values", 2, 6), + ("values", 1, 7), + ("values", 2, 8), + ], + names=[None, "lev3", "lev4"], + ), + MultiIndex.from_tuples( + [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"] + ), + ), + ], + ) + def test_pivot_list_like_columns( + self, + input_index, + input_columns, + input_values, + expected_values, + expected_columns, + expected_index, + ): + # GH 21425, test when columns is given a list + df = pd.DataFrame( + { + "lev1": [1, 1, 1, 1, 2, 2, 2, 2], + "lev2": [1, 1, 2, 2, 1, 1, 2, 2], + "lev3": [1, 2, 1, 2, 1, 2, 1, 2], + "lev4": [1, 2, 3, 4, 5, 6, 7, 8], + "values": [0, 1, 2, 3, 4, 5, 6, 7], + } + ) + + result = df.pivot(index=input_index, columns=input_columns, values=input_values) + expected = pd.DataFrame( + expected_values, columns=expected_columns, index=expected_index + ) + tm.assert_frame_equal(result, expected) + def test_pivot_columns_lexsorted(self): n = 10000 From 180194d0e8f708725e74cfb797bb88c3b6df3a3e Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 11 Jan 2020 22:02:50 +0100 Subject: [PATCH 03/14] better naming --- pandas/core/reshape/pivot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index f80bf83930315..0e4b8bbe7e548 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -433,7 +433,7 @@ def pivot(data: "DataFrame", index=None, columns=None, values=None) -> "DataFram if index is None: cols = [] elif is_list_like(index): - cols = [column for column in index] + cols = [idx for idx in index] else: cols = [index] cols.extend(columns) @@ -444,7 +444,7 @@ def pivot(data: "DataFrame", index=None, columns=None, values=None) -> "DataFram if index is None: index = [Series(data.index, name=data.index.name)] elif is_list_like(index): - index = [data[column] for column in index] + index = [data[idx] for idx in index] else: index = [data[index]] From 98e97303f7e43a090eeb7763e526fe2a93eaab97 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 11 Jan 2020 22:05:31 +0100 Subject: [PATCH 04/14] fix linting --- pandas/tests/reshape/test_pivot.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index f707db9b67a12..6c7ba0bce0ae1 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -991,7 +991,8 @@ def test_pivot_no_level_overlap(self): tm.assert_frame_equal(table, expected) @pytest.mark.parametrize( - "input_index, input_columns, input_values, expected_values, expected_columns, expected_index", + "input_index, input_columns, input_values, " + "expected_values, expected_columns, expected_index", [ ( ["lev4"], @@ -1089,7 +1090,8 @@ def test_pivot_list_like_index( tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( - "input_index, input_columns, input_values, expected_values, expected_columns, expected_index", + "input_index, input_columns, input_values, " + "expected_values, expected_columns, expected_index", [ ( "lev4", From 8739957880259933d78faa1ed87d240bee04c9e7 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 11 Jan 2020 22:08:24 +0100 Subject: [PATCH 05/14] Add whatsnew note --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 5f79accc5c679..7ae081c354fbb 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -230,6 +230,7 @@ Other enhancements - Added an experimental :attr:`~DataFrame.attrs` for storing global metadata about a dataset (:issue:`29062`) - :meth:`Timestamp.fromisocalendar` is now compatible with python 3.8 and above (:issue:`28115`) - :meth:`DataFrame.to_pickle` and :func:`read_pickle` now accept URL (:issue:`30163`) +- :meth:`DataFrame.pivot` can now take lists for ``index`` and ``columns`` arguments (:issue:`21425`) Build Changes From 8a2af11d3fa8dcf1681919102ab9592a01b241ae Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 11 Jan 2020 22:17:01 +0100 Subject: [PATCH 06/14] Update docstring --- pandas/core/frame.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5ad133f9e21a4..516c5c689c850 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5808,11 +5808,19 @@ def groupby( Parameters ----------%s - index : str or object, optional + index : str or object or a list of the previous, optional Column to use to make new frame's index. If None, uses existing index. - columns : str or object + + .. versionchanged:: 1.0.0 + Also accept list of index names. + + columns : str or object or a list of the previous Column to use to make new frame's columns. + + .. versionchanged:: 1.0.0 + Also accept list of columns names. + values : str, object or a list of the previous, optional Column(s) to use for populating new frame's values. If not specified, all remaining columns will be used and the result will From 4cdd17a3dc64e860498cc0381f5879b036813c37 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 11 Jan 2020 22:28:12 +0100 Subject: [PATCH 07/14] fix pep8 --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 516c5c689c850..9c75ea6d74776 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5811,13 +5811,13 @@ def groupby( index : str or object or a list of the previous, optional Column to use to make new frame's index. If None, uses existing index. - + .. versionchanged:: 1.0.0 Also accept list of index names. columns : str or object or a list of the previous Column to use to make new frame's columns. - + .. versionchanged:: 1.0.0 Also accept list of columns names. From ced4ec7bcca75c9d1578418e12ba35f24ed1409b Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 11 Jan 2020 22:52:53 +0100 Subject: [PATCH 08/14] fix pep --- pandas/tests/reshape/test_pivot.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 6c7ba0bce0ae1..b69fe30c3f437 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1141,17 +1141,8 @@ def test_pivot_list_like_index( [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 6.0, 7.0], ], MultiIndex.from_tuples( - [ - ("values", 1, 1), - ("values", 2, 2), - ("values", 1, 3), - ("values", 2, 4), - ("values", 1, 5), - ("values", 2, 6), - ("values", 1, 7), - ("values", 2, 8), - ], - names=[None, "lev3", "lev4"], + [(1, 1), (2, 2), (1, 3), (2, 4), (1, 5), (2, 6), (1, 7), (2, 8),], + names=["lev3", "lev4"], ), MultiIndex.from_tuples( [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"] From 7f0ea517f26c8adbe083b17a581d462225dc0377 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sun, 12 Jan 2020 09:46:09 +0100 Subject: [PATCH 09/14] fix linting --- pandas/core/reshape/pivot.py | 5 +++-- pandas/tests/reshape/test_pivot.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 0e4b8bbe7e548..f8be23207d017 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -430,10 +430,11 @@ def pivot(data: "DataFrame", index=None, columns=None, values=None) -> "DataFram columns = columns if is_list_like(columns) else [columns] if values is None: + cols: List[str] = [] if index is None: - cols = [] + pass elif is_list_like(index): - cols = [idx for idx in index] + cols = list(index) else: cols = [index] cols.extend(columns) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index b69fe30c3f437..8e60a2e5d1c5e 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1141,7 +1141,7 @@ def test_pivot_list_like_index( [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 6.0, 7.0], ], MultiIndex.from_tuples( - [(1, 1), (2, 2), (1, 3), (2, 4), (1, 5), (2, 6), (1, 7), (2, 8),], + [(1, 1), (2, 2), (1, 3), (2, 4), (1, 5), (2, 6), (1, 7), (2, 8)], names=["lev3", "lev4"], ), MultiIndex.from_tuples( From 3ed3a60dcd967f2caa2f6b2dbc366311fbce8fc1 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 18 Jan 2020 20:49:41 +0100 Subject: [PATCH 10/14] move from 1.0.0 to 1.1 --- doc/source/whatsnew/v1.0.0.rst | 1 - doc/source/whatsnew/v1.1.0.rst | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index e9c7261f366b1..3bd86bb02155f 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -228,7 +228,6 @@ Other enhancements - Added an experimental :attr:`~DataFrame.attrs` for storing global metadata about a dataset (:issue:`29062`) - :meth:`Timestamp.fromisocalendar` is now compatible with python 3.8 and above (:issue:`28115`) - :meth:`DataFrame.to_pickle` and :func:`read_pickle` now accept URL (:issue:`30163`) -- :meth:`DataFrame.pivot` can now take lists for ``index`` and ``columns`` arguments (:issue:`21425`) Build Changes diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 1cd325dad9f07..dee5b2bee2b40 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -144,6 +144,7 @@ Reshaping - Bug in :meth:`DataFrame.pivot_table` when only MultiIndexed columns is set (:issue:`17038`) - Fix incorrect error message in :meth:`DataFrame.pivot` when ``columns`` is set to ``None``. (:issue:`30924`) - Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`) +- :meth:`DataFrame.pivot` can now take lists for ``index`` and ``columns`` arguments (:issue:`21425`) Sparse From cc1826e1a36d2834e9fa735af2fb2575c4c63fe4 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 18 Jan 2020 21:07:06 +0100 Subject: [PATCH 11/14] update doc and add example --- pandas/core/frame.py | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 295a719a44418..ffa425c9751ff 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5847,14 +5847,14 @@ def groupby( Parameters ----------%s - index : str or object or a list of the previous, optional + index : str or object or a list of str, optional Column to use to make new frame's index. If None, uses existing index. .. versionchanged:: 1.0.0 Also accept list of index names. - columns : str or object or a list of the previous + columns : str or object or a list of str Column to use to make new frame's columns. .. versionchanged:: 1.0.0 @@ -5925,6 +5925,38 @@ def groupby( foo one 1 2 3 x y z two 4 5 6 q w t + + You could also assign a list of column names or a list of index names. + + >>> df = pd.DataFrame({ + ... "lev1": [1, 1, 1, 2, 2, 2], + ... "lev2": [1, 1, 2, 1, 1, 2], + ... "lev3": [1, 2, 1, 2, 1, 2], + ... "lev4": [1, 2, 3, 4, 5, 6], + ... "values": [0, 1, 2, 3, 4, 5]}) + >>> df + lev1 lev2 lev3 lev4 values + 0 1 1 1 1 0 + 1 1 1 2 2 1 + 2 1 2 1 3 2 + 3 2 1 2 4 3 + 4 2 1 1 5 4 + 5 2 2 2 6 5 + + >>> df.pivot(index="lev1", columns=["lev2", "lev3"],values="values") + lev2 1 2 + lev3 1 2 1 2 + lev1 + 1 0.0 1.0 2.0 NaN + 2 4.0 3.0 NaN 5.0 + + >>> df.pivot(index=["lev1", "lev2"], columns=["lev3"],values="values") + lev3 1 2 + lev1 lev2 + 1 1 0.0 1.0 + 2 2.0 NaN + 2 1 4.0 3.0 + 2 NaN 5.0 A ValueError is raised if there are any duplicates. From 311670bc20f2968e23c6d457cf47043dc04ed0ae Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Sat, 18 Jan 2020 21:43:21 +0100 Subject: [PATCH 12/14] fix pep8 --- pandas/core/frame.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ffa425c9751ff..dad71b7e8ba69 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5925,9 +5925,9 @@ def groupby( foo one 1 2 3 x y z two 4 5 6 q w t - + You could also assign a list of column names or a list of index names. - + >>> df = pd.DataFrame({ ... "lev1": [1, 1, 1, 2, 2, 2], ... "lev2": [1, 1, 2, 1, 1, 2], @@ -5942,14 +5942,14 @@ def groupby( 3 2 1 2 4 3 4 2 1 1 5 4 5 2 2 2 6 5 - + >>> df.pivot(index="lev1", columns=["lev2", "lev3"],values="values") lev2 1 2 lev3 1 2 1 2 lev1 1 0.0 1.0 2.0 NaN 2 4.0 3.0 NaN 5.0 - + >>> df.pivot(index=["lev1", "lev2"], columns=["lev3"],values="values") lev3 1 2 lev1 lev2 From 3aa04fa6fc6d70d35c9199eab41be664050ad79f Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 21 Jan 2020 09:22:19 +0100 Subject: [PATCH 13/14] code change on reviews --- pandas/core/frame.py | 4 +- pandas/tests/reshape/test_multilevel.py | 192 ++++++++++++++++++++++++ pandas/tests/reshape/test_pivot.py | 186 ----------------------- 3 files changed, 194 insertions(+), 188 deletions(-) create mode 100644 pandas/tests/reshape/test_multilevel.py diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a68312d4c897a..2badc3cf523d3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5876,13 +5876,13 @@ def groupby( Column to use to make new frame's index. If None, uses existing index. - .. versionchanged:: 1.0.0 + .. versionchanged:: 1.1.0 Also accept list of index names. columns : str or object or a list of str Column to use to make new frame's columns. - .. versionchanged:: 1.0.0 + .. versionchanged:: 1.1.0 Also accept list of columns names. values : str, object or a list of the previous, optional diff --git a/pandas/tests/reshape/test_multilevel.py b/pandas/tests/reshape/test_multilevel.py new file mode 100644 index 0000000000000..8374e829e6a28 --- /dev/null +++ b/pandas/tests/reshape/test_multilevel.py @@ -0,0 +1,192 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import Index, MultiIndex +import pandas._testing as tm + + +@pytest.mark.parametrize( + "input_index, input_columns, input_values, " + "expected_values, expected_columns, expected_index", + [ + ( + ["lev4"], + "lev3", + "values", + [ + [0.0, np.nan], + [np.nan, 1.0], + [2.0, np.nan], + [np.nan, 3.0], + [4.0, np.nan], + [np.nan, 5.0], + [6.0, np.nan], + [np.nan, 7.0], + ], + Index([1, 2], name="lev3"), + Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"), + ), + ( + ["lev4"], + "lev3", + None, + [ + [1.0, np.nan, 1.0, np.nan, 0.0, np.nan], + [np.nan, 1.0, np.nan, 1.0, np.nan, 1.0], + [1.0, np.nan, 2.0, np.nan, 2.0, np.nan], + [np.nan, 1.0, np.nan, 2.0, np.nan, 3.0], + [2.0, np.nan, 1.0, np.nan, 4.0, np.nan], + [np.nan, 2.0, np.nan, 1.0, np.nan, 5.0], + [2.0, np.nan, 2.0, np.nan, 6.0, np.nan], + [np.nan, 2.0, np.nan, 2.0, np.nan, 7.0], + ], + MultiIndex.from_tuples( + [ + ("lev1", 1), + ("lev1", 2), + ("lev2", 1), + ("lev2", 2), + ("values", 1), + ("values", 2), + ], + names=[None, "lev3"], + ), + Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"), + ), + ( + ["lev1", "lev2"], + "lev3", + "values", + [[0, 1], [2, 3], [4, 5], [6, 7]], + Index([1, 2], name="lev3"), + MultiIndex.from_tuples( + [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"] + ), + ), + ( + ["lev1", "lev2"], + "lev3", + None, + [[1, 2, 0, 1], [3, 4, 2, 3], [5, 6, 4, 5], [7, 8, 6, 7]], + MultiIndex.from_tuples( + [("lev4", 1), ("lev4", 2), ("values", 1), ("values", 2)], + names=[None, "lev3"], + ), + MultiIndex.from_tuples( + [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"] + ), + ), + ], +) +def test_pivot_list_like_index( + input_index, + input_columns, + input_values, + expected_values, + expected_columns, + expected_index, +): + # GH 21425, test when index is given a list + df = pd.DataFrame( + { + "lev1": [1, 1, 1, 1, 2, 2, 2, 2], + "lev2": [1, 1, 2, 2, 1, 1, 2, 2], + "lev3": [1, 2, 1, 2, 1, 2, 1, 2], + "lev4": [1, 2, 3, 4, 5, 6, 7, 8], + "values": [0, 1, 2, 3, 4, 5, 6, 7], + } + ) + + result = df.pivot(index=input_index, columns=input_columns, values=input_values) + expected = pd.DataFrame( + expected_values, columns=expected_columns, index=expected_index + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "input_index, input_columns, input_values, " + "expected_values, expected_columns, expected_index", + [ + ( + "lev4", + ["lev3"], + "values", + [ + [0.0, np.nan], + [np.nan, 1.0], + [2.0, np.nan], + [np.nan, 3.0], + [4.0, np.nan], + [np.nan, 5.0], + [6.0, np.nan], + [np.nan, 7.0], + ], + Index([1, 2], name="lev3"), + Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"), + ), + ( + ["lev1", "lev2"], + ["lev3"], + "values", + [[0, 1], [2, 3], [4, 5], [6, 7]], + Index([1, 2], name="lev3"), + MultiIndex.from_tuples( + [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"] + ), + ), + ( + ["lev1"], + ["lev2", "lev3"], + "values", + [[0, 1, 2, 3], [4, 5, 6, 7]], + MultiIndex.from_tuples( + [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev2", "lev3"] + ), + Index([1, 2], name="lev1"), + ), + ( + ["lev1", "lev2"], + ["lev3", "lev4"], + "values", + [ + [0.0, 1.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, 2.0, 3.0, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, 4.0, 5.0, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 6.0, 7.0], + ], + MultiIndex.from_tuples( + [(1, 1), (2, 2), (1, 3), (2, 4), (1, 5), (2, 6), (1, 7), (2, 8)], + names=["lev3", "lev4"], + ), + MultiIndex.from_tuples( + [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"] + ), + ), + ], +) +def test_pivot_list_like_columns( + input_index, + input_columns, + input_values, + expected_values, + expected_columns, + expected_index, +): + # GH 21425, test when columns is given a list + df = pd.DataFrame( + { + "lev1": [1, 1, 1, 1, 2, 2, 2, 2], + "lev2": [1, 1, 2, 2, 1, 1, 2, 2], + "lev3": [1, 2, 1, 2, 1, 2, 1, 2], + "lev4": [1, 2, 3, 4, 5, 6, 7, 8], + "values": [0, 1, 2, 3, 4, 5, 6, 7], + } + ) + + result = df.pivot(index=input_index, columns=input_columns, values=input_values) + expected = pd.DataFrame( + expected_values, columns=expected_columns, index=expected_index + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 6bc94f172e35a..6850c52ca05ea 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1065,192 +1065,6 @@ def test_pivot_no_level_overlap(self): expected = grouped.unstack("b").unstack("c").dropna(axis=1, how="all") tm.assert_frame_equal(table, expected) - @pytest.mark.parametrize( - "input_index, input_columns, input_values, " - "expected_values, expected_columns, expected_index", - [ - ( - ["lev4"], - "lev3", - "values", - [ - [0.0, np.nan], - [np.nan, 1.0], - [2.0, np.nan], - [np.nan, 3.0], - [4.0, np.nan], - [np.nan, 5.0], - [6.0, np.nan], - [np.nan, 7.0], - ], - Index([1, 2], name="lev3"), - Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"), - ), - ( - ["lev4"], - "lev3", - None, - [ - [1.0, np.nan, 1.0, np.nan, 0.0, np.nan], - [np.nan, 1.0, np.nan, 1.0, np.nan, 1.0], - [1.0, np.nan, 2.0, np.nan, 2.0, np.nan], - [np.nan, 1.0, np.nan, 2.0, np.nan, 3.0], - [2.0, np.nan, 1.0, np.nan, 4.0, np.nan], - [np.nan, 2.0, np.nan, 1.0, np.nan, 5.0], - [2.0, np.nan, 2.0, np.nan, 6.0, np.nan], - [np.nan, 2.0, np.nan, 2.0, np.nan, 7.0], - ], - MultiIndex.from_tuples( - [ - ("lev1", 1), - ("lev1", 2), - ("lev2", 1), - ("lev2", 2), - ("values", 1), - ("values", 2), - ], - names=[None, "lev3"], - ), - Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"), - ), - ( - ["lev1", "lev2"], - "lev3", - "values", - [[0, 1], [2, 3], [4, 5], [6, 7]], - Index([1, 2], name="lev3"), - MultiIndex.from_tuples( - [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"] - ), - ), - ( - ["lev1", "lev2"], - "lev3", - None, - [[1, 2, 0, 1], [3, 4, 2, 3], [5, 6, 4, 5], [7, 8, 6, 7]], - MultiIndex.from_tuples( - [("lev4", 1), ("lev4", 2), ("values", 1), ("values", 2)], - names=[None, "lev3"], - ), - MultiIndex.from_tuples( - [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"] - ), - ), - ], - ) - def test_pivot_list_like_index( - self, - input_index, - input_columns, - input_values, - expected_values, - expected_columns, - expected_index, - ): - # GH 21425, test when index is given a list - df = pd.DataFrame( - { - "lev1": [1, 1, 1, 1, 2, 2, 2, 2], - "lev2": [1, 1, 2, 2, 1, 1, 2, 2], - "lev3": [1, 2, 1, 2, 1, 2, 1, 2], - "lev4": [1, 2, 3, 4, 5, 6, 7, 8], - "values": [0, 1, 2, 3, 4, 5, 6, 7], - } - ) - - result = df.pivot(index=input_index, columns=input_columns, values=input_values) - expected = pd.DataFrame( - expected_values, columns=expected_columns, index=expected_index - ) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( - "input_index, input_columns, input_values, " - "expected_values, expected_columns, expected_index", - [ - ( - "lev4", - ["lev3"], - "values", - [ - [0.0, np.nan], - [np.nan, 1.0], - [2.0, np.nan], - [np.nan, 3.0], - [4.0, np.nan], - [np.nan, 5.0], - [6.0, np.nan], - [np.nan, 7.0], - ], - Index([1, 2], name="lev3"), - Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"), - ), - ( - ["lev1", "lev2"], - ["lev3"], - "values", - [[0, 1], [2, 3], [4, 5], [6, 7]], - Index([1, 2], name="lev3"), - MultiIndex.from_tuples( - [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"] - ), - ), - ( - ["lev1"], - ["lev2", "lev3"], - "values", - [[0, 1, 2, 3], [4, 5, 6, 7]], - MultiIndex.from_tuples( - [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev2", "lev3"] - ), - Index([1, 2], name="lev1"), - ), - ( - ["lev1", "lev2"], - ["lev3", "lev4"], - "values", - [ - [0.0, 1.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], - [np.nan, np.nan, 2.0, 3.0, np.nan, np.nan, np.nan, np.nan], - [np.nan, np.nan, np.nan, np.nan, 4.0, 5.0, np.nan, np.nan], - [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 6.0, 7.0], - ], - MultiIndex.from_tuples( - [(1, 1), (2, 2), (1, 3), (2, 4), (1, 5), (2, 6), (1, 7), (2, 8)], - names=["lev3", "lev4"], - ), - MultiIndex.from_tuples( - [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"] - ), - ), - ], - ) - def test_pivot_list_like_columns( - self, - input_index, - input_columns, - input_values, - expected_values, - expected_columns, - expected_index, - ): - # GH 21425, test when columns is given a list - df = pd.DataFrame( - { - "lev1": [1, 1, 1, 1, 2, 2, 2, 2], - "lev2": [1, 1, 2, 2, 1, 1, 2, 2], - "lev3": [1, 2, 1, 2, 1, 2, 1, 2], - "lev4": [1, 2, 3, 4, 5, 6, 7, 8], - "values": [0, 1, 2, 3, 4, 5, 6, 7], - } - ) - - result = df.pivot(index=input_index, columns=input_columns, values=input_values) - expected = pd.DataFrame( - expected_values, columns=expected_columns, index=expected_index - ) - tm.assert_frame_equal(result, expected) - def test_pivot_columns_lexsorted(self): n = 10000 From 9f5f1704a04d40c03c8c2f87a34c84c78bd77dca Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Tue, 21 Jan 2020 09:25:36 +0100 Subject: [PATCH 14/14] rename --- .../reshape/{test_multilevel.py => test_pivot_multilevel.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pandas/tests/reshape/{test_multilevel.py => test_pivot_multilevel.py} (100%) diff --git a/pandas/tests/reshape/test_multilevel.py b/pandas/tests/reshape/test_pivot_multilevel.py similarity index 100% rename from pandas/tests/reshape/test_multilevel.py rename to pandas/tests/reshape/test_pivot_multilevel.py