From 732e512db09405aba7a66efa00ec3c0a54f3b9ce Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 13 Sep 2020 11:08:54 +0200 Subject: [PATCH 1/4] Add test for ffill and duplicate column names --- pandas/tests/groupby/test_groupby.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 69397228dd941..1764a0c0533b1 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2146,3 +2146,16 @@ def test_groupby_column_index_name_lost_fill_funcs(func): result = getattr(df_grouped, func)().columns expected = pd.Index(["a", "b"], name="idx") tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("func", ["ffill", "bfill"]) +def test_groupby_fill_duplicate_column_names(func): + # GH: 25610 ValueError with duplicate column names + df1 = pd.DataFrame({"field1": [1, 3, 4], "field2": [1, 3, 4],}) + df2 = pd.DataFrame({"field1": [1, np.nan, 4],}) + df_grouped = pd.concat([df1, df2], axis=1).groupby(by=["field2"]) + expected = pd.DataFrame( + [[1, 1.0], [3, np.nan], [4, 4.0]], columns=["field1", "field1"] + ) + result = getattr(df_grouped, func)() + tm.assert_frame_equal(result, expected) From 3d8e02f2923a7d312213a0941196295a10c22ff0 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 13 Sep 2020 11:11:45 +0200 Subject: [PATCH 2/4] Fix Pep8 issues --- pandas/tests/groupby/test_groupby.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 1764a0c0533b1..bce23d4d95a06 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2151,8 +2151,8 @@ def test_groupby_column_index_name_lost_fill_funcs(func): @pytest.mark.parametrize("func", ["ffill", "bfill"]) def test_groupby_fill_duplicate_column_names(func): # GH: 25610 ValueError with duplicate column names - df1 = pd.DataFrame({"field1": [1, 3, 4], "field2": [1, 3, 4],}) - df2 = pd.DataFrame({"field1": [1, np.nan, 4],}) + df1 = pd.DataFrame({"field1": [1, 3, 4], "field2": [1, 3, 4]}) + df2 = pd.DataFrame({"field1": [1, np.nan, 4]}) df_grouped = pd.concat([df1, df2], axis=1).groupby(by=["field2"]) expected = pd.DataFrame( [[1, 1.0], [3, np.nan], [4, 4.0]], columns=["field1", "field1"] From 78096acf7b0bf6356aa520d60b2d7110f92b7a81 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 13 Sep 2020 15:12:41 +0200 Subject: [PATCH 3/4] Create separate file for fill tests --- pandas/tests/groupby/test_function.py | 45 -------------- pandas/tests/groupby/test_groupby.py | 33 ----------- pandas/tests/groupby/test_missing.py | 84 +++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 78 deletions(-) create mode 100644 pandas/tests/groupby/test_missing.py diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 42945be923fa0..ab736b55b5743 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -495,51 +495,6 @@ def test_idxmin_idxmax_returns_int_types(func, values): tm.assert_frame_equal(result, expected) -def test_fill_consistency(): - - # GH9221 - # pass thru keyword arguments to the generated wrapper - # are set if the passed kw is None (only) - df = DataFrame( - index=pd.MultiIndex.from_product( - [["value1", "value2"], date_range("2014-01-01", "2014-01-06")] - ), - columns=Index(["1", "2"], name="id"), - ) - df["1"] = [ - np.nan, - 1, - np.nan, - np.nan, - 11, - np.nan, - np.nan, - 2, - np.nan, - np.nan, - 22, - np.nan, - ] - df["2"] = [ - np.nan, - 3, - np.nan, - np.nan, - 33, - np.nan, - np.nan, - 4, - np.nan, - np.nan, - 44, - np.nan, - ] - - expected = df.groupby(level=0, axis=0).fillna(method="ffill") - result = df.T.groupby(level=0, axis=1).fillna(method="ffill").T - tm.assert_frame_equal(result, expected) - - def test_groupby_cumprod(): # GH 4095 df = pd.DataFrame({"key": ["b"] * 10, "value": 2}) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index bce23d4d95a06..313b0ea2434f9 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1961,13 +1961,6 @@ def test_shift_bfill_ffill_tz(tz_naive_fixture, op, expected): tm.assert_frame_equal(result, expected) -def test_ffill_missing_arguments(): - # GH 14955 - df = pd.DataFrame({"a": [1, 2], "b": [1, 1]}) - with pytest.raises(ValueError, match="Must specify a fill"): - df.groupby("b").fillna() - - def test_groupby_only_none_group(): # see GH21624 # this was crashing with "ValueError: Length of passed values is 1, index implies 0" @@ -2133,29 +2126,3 @@ def test_groupby_column_index_name_lost(func): df_grouped = df.groupby([1]) result = getattr(df_grouped, func)().columns tm.assert_index_equal(result, expected) - - -@pytest.mark.parametrize("func", ["ffill", "bfill"]) -def test_groupby_column_index_name_lost_fill_funcs(func): - # GH: 29764 groupby loses index sometimes - df = pd.DataFrame( - [[1, 1.0, -1.0], [1, np.nan, np.nan], [1, 2.0, -2.0]], - columns=pd.Index(["type", "a", "b"], name="idx"), - ) - df_grouped = df.groupby(["type"])[["a", "b"]] - result = getattr(df_grouped, func)().columns - expected = pd.Index(["a", "b"], name="idx") - tm.assert_index_equal(result, expected) - - -@pytest.mark.parametrize("func", ["ffill", "bfill"]) -def test_groupby_fill_duplicate_column_names(func): - # GH: 25610 ValueError with duplicate column names - df1 = pd.DataFrame({"field1": [1, 3, 4], "field2": [1, 3, 4]}) - df2 = pd.DataFrame({"field1": [1, np.nan, 4]}) - df_grouped = pd.concat([df1, df2], axis=1).groupby(by=["field2"]) - expected = pd.DataFrame( - [[1, 1.0], [3, np.nan], [4, 4.0]], columns=["field1", "field1"] - ) - result = getattr(df_grouped, func)() - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_missing.py b/pandas/tests/groupby/test_missing.py new file mode 100644 index 0000000000000..06dd7a2451e42 --- /dev/null +++ b/pandas/tests/groupby/test_missing.py @@ -0,0 +1,84 @@ +import numpy as np + +import pandas as pd +import pandas._testing as tm +from pandas import DataFrame, Index, date_range +import pytest + + +@pytest.mark.parametrize("func", ["ffill", "bfill"]) +def test_groupby_column_index_name_lost_fill_funcs(func): + # GH: 29764 groupby loses index sometimes + df = pd.DataFrame( + [[1, 1.0, -1.0], [1, np.nan, np.nan], [1, 2.0, -2.0]], + columns=pd.Index(["type", "a", "b"], name="idx"), + ) + df_grouped = df.groupby(["type"])[["a", "b"]] + result = getattr(df_grouped, func)().columns + expected = pd.Index(["a", "b"], name="idx") + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize("func", ["ffill", "bfill"]) +def test_groupby_fill_duplicate_column_names(func): + # GH: 25610 ValueError with duplicate column names + df1 = pd.DataFrame({"field1": [1, 3, 4], "field2": [1, 3, 4]}) + df2 = pd.DataFrame({"field1": [1, np.nan, 4]}) + df_grouped = pd.concat([df1, df2], axis=1).groupby(by=["field2"]) + expected = pd.DataFrame( + [[1, 1.0], [3, np.nan], [4, 4.0]], columns=["field1", "field1"] + ) + result = getattr(df_grouped, func)() + tm.assert_frame_equal(result, expected) + + +def test_ffill_missing_arguments(): + # GH 14955 + df = pd.DataFrame({"a": [1, 2], "b": [1, 1]}) + with pytest.raises(ValueError, match="Must specify a fill"): + df.groupby("b").fillna() + + +def test_fill_consistency(): + + # GH9221 + # pass thru keyword arguments to the generated wrapper + # are set if the passed kw is None (only) + df = DataFrame( + index=pd.MultiIndex.from_product( + [["value1", "value2"], date_range("2014-01-01", "2014-01-06")] + ), + columns=Index(["1", "2"], name="id"), + ) + df["1"] = [ + np.nan, + 1, + np.nan, + np.nan, + 11, + np.nan, + np.nan, + 2, + np.nan, + np.nan, + 22, + np.nan, + ] + df["2"] = [ + np.nan, + 3, + np.nan, + np.nan, + 33, + np.nan, + np.nan, + 4, + np.nan, + np.nan, + 44, + np.nan, + ] + + expected = df.groupby(level=0, axis=0).fillna(method="ffill") + result = df.T.groupby(level=0, axis=1).fillna(method="ffill").T + tm.assert_frame_equal(result, expected) From d89e92e8c807946d16b2d74e39e47196ccd2eb13 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 13 Sep 2020 16:26:45 +0200 Subject: [PATCH 4/4] Resort Imports --- pandas/tests/groupby/test_missing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_missing.py b/pandas/tests/groupby/test_missing.py index 06dd7a2451e42..116aed9935694 100644 --- a/pandas/tests/groupby/test_missing.py +++ b/pandas/tests/groupby/test_missing.py @@ -1,9 +1,9 @@ import numpy as np +import pytest import pandas as pd -import pandas._testing as tm from pandas import DataFrame, Index, date_range -import pytest +import pandas._testing as tm @pytest.mark.parametrize("func", ["ffill", "bfill"])