From c4e83b222df7c89ccc529f92e7487d9eab9b2c1e Mon Sep 17 00:00:00 2001 From: th3nn3ss Date: Wed, 31 Aug 2022 13:58:10 +0100 Subject: [PATCH 1/6] add test for inconsistent dataframe representation for empty inputs --- pandas/tests/groupby/test_apply.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 5a66d13efce65..bd4a4fe09fca0 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -2,6 +2,7 @@ date, datetime, ) +import datetime as dt from io import StringIO import numpy as np @@ -1331,3 +1332,16 @@ def test_result_name_when_one_group(name): expected = Series([1, 2], name=name) tm.assert_series_equal(result, expected) + +def test_empty_inputs_with_apply_inconsistency(): + df = pd.DataFrame([(dt.date.today(), 2, 3)], columns=["date", "a", "b"]) + df["date"] = pd.to_datetime(df["date"]) + df = df[df["b"] == 1] # An empty dataframe + result = df.set_index('date').groupby('a', group_keys=True).apply(lambda x:x) + + df2 = pd.DataFrame([(dt.date.today(), 2, 3)], columns=["date", "a", "b"]) + df2["date"] = pd.to_datetime(df2["date"]) + df3 = df2.set_index('date').groupby('a', group_keys=True).apply(lambda x:x) + expected = df3.iloc[:0] # An empty dataframe + + tm.assert_equal(result, expected) \ No newline at end of file From 399e2096d0ef3e6bd8904e44324ed6878de92ab5 Mon Sep 17 00:00:00 2001 From: th3nn3ss Date: Wed, 31 Aug 2022 14:04:19 +0100 Subject: [PATCH 2/6] precomit --- pandas/tests/groupby/test_apply.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index bd4a4fe09fca0..cb8e35c4aa215 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1,8 +1,8 @@ +import datetime as dt from datetime import ( date, datetime, ) -import datetime as dt from io import StringIO import numpy as np @@ -1333,15 +1333,16 @@ def test_result_name_when_one_group(name): tm.assert_series_equal(result, expected) + def test_empty_inputs_with_apply_inconsistency(): df = pd.DataFrame([(dt.date.today(), 2, 3)], columns=["date", "a", "b"]) df["date"] = pd.to_datetime(df["date"]) - df = df[df["b"] == 1] # An empty dataframe - result = df.set_index('date').groupby('a', group_keys=True).apply(lambda x:x) - + df = df[df["b"] == 1] # An empty dataframe + result = df.set_index("date").groupby("a", group_keys=True).apply(lambda x: x) + df2 = pd.DataFrame([(dt.date.today(), 2, 3)], columns=["date", "a", "b"]) df2["date"] = pd.to_datetime(df2["date"]) - df3 = df2.set_index('date').groupby('a', group_keys=True).apply(lambda x:x) - expected = df3.iloc[:0] # An empty dataframe + df3 = df2.set_index("date").groupby("a", group_keys=True).apply(lambda x: x) + expected = df3.iloc[:0] # An empty dataframe - tm.assert_equal(result, expected) \ No newline at end of file + tm.assert_equal(result, expected) From 0d7223a716fa9157276c5d9865da39f5f7b504a4 Mon Sep 17 00:00:00 2001 From: th3nn3ss Date: Tue, 6 Sep 2022 04:09:48 +0100 Subject: [PATCH 3/6] add test for empty df constructs --- pandas/tests/frame/test_constructors.py | 17 +++++++++++++++++ pandas/tests/groupby/test_apply.py | 11 ++++++----- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index b4f027f3a832a..cfbfc2816ee9a 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -9,6 +9,7 @@ ) import functools import itertools +from nis import match import re import warnings @@ -3177,3 +3178,19 @@ def test_frame_allow_non_nano(self, arr): def test_frame_from_dict_allow_non_nano(self, arr): df = DataFrame({0: arr}) assert df.dtypes[0] == arr.dtype + +def test_dtype_warning_on_empty_list_df(): + # pd.Series([]) without a specified dtype warns the user + expected = pd.DataFrame({"a": pd.Series([]), "b": pd.Series([])}) + + with tm.assert_produces_warning(FutureWarning): + # Lists does not warn the user + result = pd.DataFrame({"a": [], "b": []}) + tm.assert_frame_equal(result, expected) # This is true + +def test_empty_constructs(): + # There should be a consistency for the default dtype when it's not supplied by the user. + result = pd.DataFrame({"a": [], "b": []}) + expected = pd.DataFrame(columns=["a", "b"]) + + tm.assert_frame_equal(result, expected) \ No newline at end of file diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index cb8e35c4aa215..f4697a086e714 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1,4 +1,3 @@ -import datetime as dt from datetime import ( date, datetime, @@ -1334,15 +1333,17 @@ def test_result_name_when_one_group(name): tm.assert_series_equal(result, expected) -def test_empty_inputs_with_apply_inconsistency(): - df = pd.DataFrame([(dt.date.today(), 2, 3)], columns=["date", "a", "b"]) +def test_apply_on_empty_groupby_dataframe(): + df = pd.DataFrame([(date.today(), 2, 3)], columns=["date", "a", "b"]) df["date"] = pd.to_datetime(df["date"]) df = df[df["b"] == 1] # An empty dataframe result = df.set_index("date").groupby("a", group_keys=True).apply(lambda x: x) - df2 = pd.DataFrame([(dt.date.today(), 2, 3)], columns=["date", "a", "b"]) + df2 = pd.DataFrame([(date.today(), 2, 3)], columns=["date", "a", "b"]) df2["date"] = pd.to_datetime(df2["date"]) df3 = df2.set_index("date").groupby("a", group_keys=True).apply(lambda x: x) expected = df3.iloc[:0] # An empty dataframe - tm.assert_equal(result, expected) + tm.assert_frame_equal(result, expected) + + From 8f7bc393cd5595563f8af921c376f9fbd7e6ec37 Mon Sep 17 00:00:00 2001 From: th3nn3ss Date: Tue, 6 Sep 2022 04:28:09 +0100 Subject: [PATCH 4/6] precommit --- pandas/tests/frame/test_constructors.py | 7 ++++--- pandas/tests/groupby/test_apply.py | 2 -- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index cfbfc2816ee9a..1aaeebd5c9103 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -9,7 +9,6 @@ ) import functools import itertools -from nis import match import re import warnings @@ -3179,6 +3178,7 @@ def test_frame_from_dict_allow_non_nano(self, arr): df = DataFrame({0: arr}) assert df.dtypes[0] == arr.dtype + def test_dtype_warning_on_empty_list_df(): # pd.Series([]) without a specified dtype warns the user expected = pd.DataFrame({"a": pd.Series([]), "b": pd.Series([])}) @@ -3186,11 +3186,12 @@ def test_dtype_warning_on_empty_list_df(): with tm.assert_produces_warning(FutureWarning): # Lists does not warn the user result = pd.DataFrame({"a": [], "b": []}) - tm.assert_frame_equal(result, expected) # This is true + tm.assert_frame_equal(result, expected) # This is true + def test_empty_constructs(): # There should be a consistency for the default dtype when it's not supplied by the user. result = pd.DataFrame({"a": [], "b": []}) expected = pd.DataFrame(columns=["a", "b"]) - tm.assert_frame_equal(result, expected) \ No newline at end of file + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index f4697a086e714..dfced40fc5b3d 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1345,5 +1345,3 @@ def test_apply_on_empty_groupby_dataframe(): expected = df3.iloc[:0] # An empty dataframe tm.assert_frame_equal(result, expected) - - From ea79bd98504134312da08851a663b5bbbe74f82e Mon Sep 17 00:00:00 2001 From: th3nn3ss Date: Tue, 6 Sep 2022 04:59:23 +0100 Subject: [PATCH 5/6] add test for empty df without column name --- pandas/tests/frame/test_constructors.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 1aaeebd5c9103..0eab6575d68a9 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -3190,8 +3190,22 @@ def test_dtype_warning_on_empty_list_df(): def test_empty_constructs(): - # There should be a consistency for the default dtype when it's not supplied by the user. + # There should be a consistency for dtype when it's not supplied by the user result = pd.DataFrame({"a": [], "b": []}) expected = pd.DataFrame(columns=["a", "b"]) tm.assert_frame_equal(result, expected) + + +def test_empty_df_without_column_names(): + # Given + result_with_data = pd.DataFrame([1, 2, 3]) + expected_with_data = pd.DataFrame(pd.Series([1, 2, 3])) + # Then + tm.assert_frame_equal(result_with_data, expected_with_data) # True + + # But when it's empty + result_empty = pd.DataFrame([]) + expected_empty = pd.DataFrame(pd.Series([])) + + tm.assert_frame_equal(result_empty, expected_empty) From 5c945d5ba090574203d898b8cdb10c97f0f26547 Mon Sep 17 00:00:00 2001 From: th3nn3ss Date: Thu, 8 Sep 2022 08:55:23 +0100 Subject: [PATCH 6/6] add test for groupby sample on empty inputs --- pandas/tests/groupby/test_sample.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/groupby/test_sample.py b/pandas/tests/groupby/test_sample.py index 9153fac0927c5..a68a654d6c136 100644 --- a/pandas/tests/groupby/test_sample.py +++ b/pandas/tests/groupby/test_sample.py @@ -142,3 +142,12 @@ def test_groupby_sample_with_selections(): result = df.groupby("a")[["b", "c"]].sample(n=None, frac=None) expected = DataFrame({"b": [1, 2], "c": [1, 2]}, index=result.index) tm.assert_frame_equal(result, expected) + + +def test_groupby_sample_with_empty_inputs(): + df = DataFrame({"a": [], "b": []}) + + gb_df = df.groupby("a").sample() + result = gb_df.empty + expected = True + assert result == expected