diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index b4f027f3a832a..0eab6575d68a9 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -3177,3 +3177,35 @@ def test_frame_allow_non_nano(self, arr): def test_frame_from_dict_allow_non_nano(self, arr): df = DataFrame({0: arr}) assert df.dtypes[0] == arr.dtype + + +def test_dtype_warning_on_empty_list_df(): + # pd.Series([]) without a specified dtype warns the user + expected = pd.DataFrame({"a": pd.Series([]), "b": pd.Series([])}) + + with tm.assert_produces_warning(FutureWarning): + # Lists does not warn the user + result = pd.DataFrame({"a": [], "b": []}) + tm.assert_frame_equal(result, expected) # This is true + + +def test_empty_constructs(): + # There should be a consistency for dtype when it's not supplied by the user + result = pd.DataFrame({"a": [], "b": []}) + expected = pd.DataFrame(columns=["a", "b"]) + + tm.assert_frame_equal(result, expected) + + +def test_empty_df_without_column_names(): + # Given + result_with_data = pd.DataFrame([1, 2, 3]) + expected_with_data = pd.DataFrame(pd.Series([1, 2, 3])) + # Then + tm.assert_frame_equal(result_with_data, expected_with_data) # True + + # But when it's empty + result_empty = pd.DataFrame([]) + expected_empty = pd.DataFrame(pd.Series([])) + + tm.assert_frame_equal(result_empty, expected_empty) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 5a66d13efce65..dfced40fc5b3d 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1331,3 +1331,17 @@ def test_result_name_when_one_group(name): expected = Series([1, 2], name=name) tm.assert_series_equal(result, expected) + + +def test_apply_on_empty_groupby_dataframe(): + df = pd.DataFrame([(date.today(), 2, 3)], columns=["date", "a", "b"]) + df["date"] = pd.to_datetime(df["date"]) + df = df[df["b"] == 1] # An empty dataframe + result = df.set_index("date").groupby("a", group_keys=True).apply(lambda x: x) + + df2 = pd.DataFrame([(date.today(), 2, 3)], columns=["date", "a", "b"]) + df2["date"] = pd.to_datetime(df2["date"]) + df3 = df2.set_index("date").groupby("a", group_keys=True).apply(lambda x: x) + expected = df3.iloc[:0] # An empty dataframe + + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_sample.py b/pandas/tests/groupby/test_sample.py index 9153fac0927c5..a68a654d6c136 100644 --- a/pandas/tests/groupby/test_sample.py +++ b/pandas/tests/groupby/test_sample.py @@ -142,3 +142,12 @@ def test_groupby_sample_with_selections(): result = df.groupby("a")[["b", "c"]].sample(n=None, frac=None) expected = DataFrame({"b": [1, 2], "c": [1, 2]}, index=result.index) tm.assert_frame_equal(result, expected) + + +def test_groupby_sample_with_empty_inputs(): + df = DataFrame({"a": [], "b": []}) + + gb_df = df.groupby("a").sample() + result = gb_df.empty + expected = True + assert result == expected