From f1befd13ba673273dd3c77962acdd3c50a202d90 Mon Sep 17 00:00:00 2001 From: sidharthann Date: Sun, 22 Sep 2019 18:45:38 +0100 Subject: [PATCH 1/9] BUG: integer indexes incorrectly referenced by name #28247 --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/groupby/generic.py | 2 +- pandas/tests/groupby/test_apply.py | 10 ++++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 173cc6b6b483c..d23117ebf49eb 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -181,6 +181,7 @@ Datetimelike - Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`) - Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`) - Bug in :class:`DataFrame` arithmetic operations when operating with a :class:`Series` with dtype `'timedelta64[ns]'` (:issue:`28049`) +- Bug in :func:`pandas.core.groupby.generic._recast_datetimelike_result` integer indexes for columns referenced by name (:issue:`28247`) - Timedelta diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index f8f1455561c03..0910be6a231c9 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1905,7 +1905,7 @@ def _recast_datetimelike_result(result: DataFrame) -> DataFrame: result = result.copy() obj_cols = [ - idx for idx in range(len(result.columns)) if is_object_dtype(result.dtypes[idx]) + idx for idx in range(len(result.columns)) if is_object_dtype(result.dtypes.iloc[idx]) ] # See GH#26285 diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 76588549532b1..560e386c9d930 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -657,3 +657,13 @@ def test_apply_with_mixed_types(): result = g.apply(lambda x: x / x.sum()) tm.assert_frame_equal(result, expected) + + +def test_apply_datetime_issue(): + # GH-28247 + + df = pd.DataFrame({'a': ['foo'], 'b': [datetime.today()]}) + result = df.groupby('a').apply(lambda x: pd.Series(['spam'], index=[42])) + + expected = pd.DataFrame(['spam'], Index(['foo'], dtype='object', name='a'), columns=[42]) + tm.assert_frame_equal(result, expected) From 2c2dacb4e6fe59ff361a89327ff9f39e450a11ac Mon Sep 17 00:00:00 2001 From: sidharthann Date: Sun, 22 Sep 2019 19:13:14 +0100 Subject: [PATCH 2/9] improve formatting #28247 --- pandas/core/groupby/generic.py | 5 ++--- pandas/tests/groupby/test_apply.py | 3 ++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 0910be6a231c9..7a6f11667dcc4 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1904,9 +1904,8 @@ def _recast_datetimelike_result(result: DataFrame) -> DataFrame: """ result = result.copy() - obj_cols = [ - idx for idx in range(len(result.columns)) if is_object_dtype(result.dtypes.iloc[idx]) - ] + obj_cols = [idx for idx in range(len(result.columns)) + if is_object_dtype(result.dtypes.iloc[idx])] # See GH#26285 for n in obj_cols: diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 560e386c9d930..752a62b3226c5 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -665,5 +665,6 @@ def test_apply_datetime_issue(): df = pd.DataFrame({'a': ['foo'], 'b': [datetime.today()]}) result = df.groupby('a').apply(lambda x: pd.Series(['spam'], index=[42])) - expected = pd.DataFrame(['spam'], Index(['foo'], dtype='object', name='a'), columns=[42]) + expected = pd.DataFrame(['spam'], Index(['foo'], dtype='object', name='a'), + columns=[42]) tm.assert_frame_equal(result, expected) From 3880c9dd02631667b3ce5dadc265bf2ac994bd8d Mon Sep 17 00:00:00 2001 From: sidharthann Date: Sun, 22 Sep 2019 21:02:38 +0100 Subject: [PATCH 3/9] Better whats new commit message #28569 --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index d23117ebf49eb..532e741371b10 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -181,7 +181,7 @@ Datetimelike - Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`) - Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`) - Bug in :class:`DataFrame` arithmetic operations when operating with a :class:`Series` with dtype `'timedelta64[ns]'` (:issue:`28049`) -- Bug in :func:`pandas.core.groupby.generic._recast_datetimelike_result` integer indexes for columns referenced by name (:issue:`28247`) +- Bug in :func:`pandas.core.groupby.generic._recast_datetimelike_result` causing ``KeyError`` in :func:`pandas.core.groupby.generic.SeriesGroupBy.apply` when a column index in the original DataFrame is datetime like(:issue:`28247`) - Timedelta From 572e53d9aaf730114d749ca6f27e4846a221a591 Mon Sep 17 00:00:00 2001 From: sidharthann Date: Sun, 22 Sep 2019 22:48:17 +0100 Subject: [PATCH 4/9] Better whats new commit message #28569 --- doc/source/whatsnew/v1.0.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 532e741371b10..6b9bfd3e46781 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -182,7 +182,6 @@ Datetimelike - Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`) - Bug in :class:`DataFrame` arithmetic operations when operating with a :class:`Series` with dtype `'timedelta64[ns]'` (:issue:`28049`) - Bug in :func:`pandas.core.groupby.generic._recast_datetimelike_result` causing ``KeyError`` in :func:`pandas.core.groupby.generic.SeriesGroupBy.apply` when a column index in the original DataFrame is datetime like(:issue:`28247`) -- Timedelta ^^^^^^^^^ From 2b6cdcd8c7fdcf522307c9aac5a7744b801062e7 Mon Sep 17 00:00:00 2001 From: sidharthann Date: Sun, 22 Sep 2019 23:36:25 +0100 Subject: [PATCH 5/9] black re-format #28569 --- pandas/core/groupby/generic.py | 7 +++++-- pandas/tests/groupby/test_apply.py | 9 +++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 7a6f11667dcc4..a4dc1613d8c80 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1904,8 +1904,11 @@ def _recast_datetimelike_result(result: DataFrame) -> DataFrame: """ result = result.copy() - obj_cols = [idx for idx in range(len(result.columns)) - if is_object_dtype(result.dtypes.iloc[idx])] + obj_cols = [ + idx + for idx in range(len(result.columns)) + if is_object_dtype(result.dtypes.iloc[idx]) + ] # See GH#26285 for n in obj_cols: diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 752a62b3226c5..81f22d581e80a 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -662,9 +662,10 @@ def test_apply_with_mixed_types(): def test_apply_datetime_issue(): # GH-28247 - df = pd.DataFrame({'a': ['foo'], 'b': [datetime.today()]}) - result = df.groupby('a').apply(lambda x: pd.Series(['spam'], index=[42])) + df = pd.DataFrame({"a": ["foo"], "b": [datetime.today()]}) + result = df.groupby("a").apply(lambda x: pd.Series(["spam"], index=[42])) - expected = pd.DataFrame(['spam'], Index(['foo'], dtype='object', name='a'), - columns=[42]) + expected = pd.DataFrame( + ["spam"], Index(["foo"], dtype="object", name="a"), columns=[42] + ) tm.assert_frame_equal(result, expected) From 1fa8a6e575fb16ee9a589bbb5768f509a0eba599 Mon Sep 17 00:00:00 2001 From: sidharthann Date: Tue, 1 Oct 2019 07:10:18 +0100 Subject: [PATCH 6/9] added test cases, fixed spellings #28569 --- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/tests/groupby/test_apply.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 3f4e3756d64a8..6152500b3957f 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -182,7 +182,7 @@ Datetimelike - Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`) - Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`) - Bug in :class:`DataFrame` arithmetic operations when operating with a :class:`Series` with dtype `'timedelta64[ns]'` (:issue:`28049`) -- Bug in :func:`pandas.core.groupby.generic._recast_datetimelike_result` causing ``KeyError`` in :func:`pandas.core.groupby.generic.SeriesGroupBy.apply` when a column index in the original DataFrame is datetime like(:issue:`28247`) +- Bug in :func:`pandas.core.groupby.generic._recast_datetimelike_result` causing ``KeyError`` in :func:`pandas.core.groupby.generic.SeriesGroupBy.apply` when a column index in the original DataFrame is datetimelike (:issue:`28247`) Timedelta ^^^^^^^^^ diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 81f22d581e80a..a289745826b22 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -669,3 +669,19 @@ def test_apply_datetime_issue(): ["spam"], Index(["foo"], dtype="object", name="a"), columns=[42] ) tm.assert_frame_equal(result, expected) + + df = pd.DataFrame({"a": ["foo"], "b": [datetime.today().date()]}) + result = df.groupby("a").apply(lambda x: pd.Series(["spam"], index=[42])) + + expected = pd.DataFrame( + ["spam"], Index(["foo"], dtype="object", name="a"), columns=[42] + ) + tm.assert_frame_equal(result, expected) + + df = pd.DataFrame({"a": ["foo"], "b": [datetime.today().time()]}) + result = df.groupby("a").apply(lambda x: pd.Series(["spam"], index=[42])) + + expected = pd.DataFrame( + ["spam"], Index(["foo"], dtype="object", name="a"), columns=[42] + ) + tm.assert_frame_equal(result, expected) From 03d2d0bfd187bb6e22d481e25ca128227e6a789c Mon Sep 17 00:00:00 2001 From: sidharthann Date: Tue, 1 Oct 2019 22:42:34 +0100 Subject: [PATCH 7/9] better docstrings #28569 --- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/tests/groupby/test_apply.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 6152500b3957f..08bc333d926db 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -182,7 +182,7 @@ Datetimelike - Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`) - Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`) - Bug in :class:`DataFrame` arithmetic operations when operating with a :class:`Series` with dtype `'timedelta64[ns]'` (:issue:`28049`) -- Bug in :func:`pandas.core.groupby.generic._recast_datetimelike_result` causing ``KeyError`` in :func:`pandas.core.groupby.generic.SeriesGroupBy.apply` when a column index in the original DataFrame is datetimelike (:issue:`28247`) +- Bug in :func:`pandas.core.groupby.generic.SeriesGroupBy.apply` raising ``ValueError`` when a column in the original DataFrame is a datetime and the column labels are not standard integers (:issue:`28247`) Timedelta ^^^^^^^^^ diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index a289745826b22..4d30f8b9fb202 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -661,6 +661,8 @@ def test_apply_with_mixed_types(): def test_apply_datetime_issue(): # GH-28247 + # groupby-apply throws an error if one of the columns in the DataFrame is a datetime object + # and the column labels are different from standard int values in range(len(num_columns)) df = pd.DataFrame({"a": ["foo"], "b": [datetime.today()]}) result = df.groupby("a").apply(lambda x: pd.Series(["spam"], index=[42])) From 0c6060a72393acbbf0818394dc4ce800e10ff10e Mon Sep 17 00:00:00 2001 From: sidharthann Date: Tue, 1 Oct 2019 22:47:39 +0100 Subject: [PATCH 8/9] reduce line length #28569 --- pandas/tests/groupby/test_apply.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 4d30f8b9fb202..e190c10f0c3e5 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -661,8 +661,9 @@ def test_apply_with_mixed_types(): def test_apply_datetime_issue(): # GH-28247 - # groupby-apply throws an error if one of the columns in the DataFrame is a datetime object - # and the column labels are different from standard int values in range(len(num_columns)) + # groupby-apply throws an error if one of the columns in the DataFrame + # is a datetime object and the column labels are different from + # standard int values in range(len(num_columns)) df = pd.DataFrame({"a": ["foo"], "b": [datetime.today()]}) result = df.groupby("a").apply(lambda x: pd.Series(["spam"], index=[42])) From 7132a7bfebc3cc0466e7949a3b2a27e6d0ab9977 Mon Sep 17 00:00:00 2001 From: sidharthann Date: Thu, 3 Oct 2019 06:49:05 +0100 Subject: [PATCH 9/9] parametrize test input #28569 --- pandas/tests/groupby/test_apply.py | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index e190c10f0c3e5..4d0063b773bc5 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -659,29 +659,17 @@ def test_apply_with_mixed_types(): tm.assert_frame_equal(result, expected) -def test_apply_datetime_issue(): +@pytest.mark.parametrize( + "group_column_dtlike", + [datetime.today(), datetime.today().date(), datetime.today().time()], +) +def test_apply_datetime_issue(group_column_dtlike): # GH-28247 # groupby-apply throws an error if one of the columns in the DataFrame # is a datetime object and the column labels are different from # standard int values in range(len(num_columns)) - df = pd.DataFrame({"a": ["foo"], "b": [datetime.today()]}) - result = df.groupby("a").apply(lambda x: pd.Series(["spam"], index=[42])) - - expected = pd.DataFrame( - ["spam"], Index(["foo"], dtype="object", name="a"), columns=[42] - ) - tm.assert_frame_equal(result, expected) - - df = pd.DataFrame({"a": ["foo"], "b": [datetime.today().date()]}) - result = df.groupby("a").apply(lambda x: pd.Series(["spam"], index=[42])) - - expected = pd.DataFrame( - ["spam"], Index(["foo"], dtype="object", name="a"), columns=[42] - ) - tm.assert_frame_equal(result, expected) - - df = pd.DataFrame({"a": ["foo"], "b": [datetime.today().time()]}) + df = pd.DataFrame({"a": ["foo"], "b": [group_column_dtlike]}) result = df.groupby("a").apply(lambda x: pd.Series(["spam"], index=[42])) expected = pd.DataFrame(