From 00811a1a50bdd98d5da9816dfbd0ec56722642ca Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 4 Apr 2023 23:07:03 +0100 Subject: [PATCH 1/6] API: Series/DataFrame from empty dict should have RangeIndex --- doc/source/whatsnew/v2.0.1.rst | 3 +- pandas/core/internals/construction.py | 2 +- pandas/core/series.py | 2 +- .../frame/constructors/test_from_dict.py | 3 +- pandas/tests/frame/test_constructors.py | 2 +- pandas/tests/series/test_constructors.py | 29 +++++++++---------- 6 files changed, 21 insertions(+), 20 deletions(-) diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst index 0122c84ba2a8e..3518c96ce437a 100644 --- a/doc/source/whatsnew/v2.0.1.rst +++ b/doc/source/whatsnew/v2.0.1.rst @@ -27,7 +27,8 @@ Bug fixes Other ~~~~~ -- +- :class:'DataFrame' created from empty dicts had :attr:'~DataFrame.columns' of dtype `object'. It is now a :class'RangeIndex' (:issue:'52404') +- :class:'Series' created from empty dicts had :attr:'~Series.index' of dtype `object'. It is now a :class'RangeIndex' (:issue:'52404') .. --------------------------------------------------------------------------- .. _whatsnew_201.contributors: diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 1108e6051d20b..c8fef14db9341 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -466,7 +466,7 @@ def dict_to_mgr( else: keys = list(data.keys()) - columns = Index(keys) + columns = Index(keys) if keys else default_index(0) arrays = [com.maybe_iterable_to_list(data[k]) for k in keys] arrays = [arr if not isinstance(arr, Index) else arr._data for arr in arrays] diff --git a/pandas/core/series.py b/pandas/core/series.py index 22c8d8b047280..445d1ec4d66bc 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -562,7 +562,7 @@ def _init_dict( values = [] keys = index else: - keys, values = (), [] + keys, values = default_index(0), [] # Input is now list-like, so rely on "standard" construction: diff --git a/pandas/tests/frame/constructors/test_from_dict.py b/pandas/tests/frame/constructors/test_from_dict.py index c04213c215f0d..3e0d1db93be03 100644 --- a/pandas/tests/frame/constructors/test_from_dict.py +++ b/pandas/tests/frame/constructors/test_from_dict.py @@ -7,6 +7,7 @@ DataFrame, Index, MultiIndex, + RangeIndex, Series, ) import pandas._testing as tm @@ -165,7 +166,7 @@ def test_constructor_from_dict_tuples(self, data_dict, keys, orient): df = DataFrame.from_dict(data_dict, orient) result = df.columns - expected = Index(keys, dtype="object", tupleize_cols=False) + expected = Index(keys, tupleize_cols=False) if keys else RangeIndex(0) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index cb61a68200411..4815fde4d9d6c 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -224,7 +224,7 @@ def test_empty_constructor(self, constructor): ], ) def test_empty_constructor_object_index(self, constructor): - expected = DataFrame(columns=Index([])) + expected = DataFrame(index=RangeIndex(0), columns=RangeIndex(0)) result = constructor() assert len(result.index) == 0 assert len(result.columns) == 0 diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index d3cc3239da482..4905c3cf95a47 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -93,35 +93,34 @@ def test_unparsable_strings_with_dt64_dtype(self): Series(np.array(vals, dtype=object), dtype="datetime64[ns]") @pytest.mark.parametrize( - "constructor,check_index_type", + "constructor", [ # NOTE: some overlap with test_constructor_empty but that test does not # test for None or an empty generator. # test_constructor_pass_none tests None but only with the index also # passed. - (lambda idx: Series(index=idx), True), - (lambda idx: Series(None, index=idx), True), - (lambda idx: Series({}, index=idx), False), # creates an Index[object] - (lambda idx: Series((), index=idx), True), - (lambda idx: Series([], index=idx), True), - (lambda idx: Series((_ for _ in []), index=idx), True), - (lambda idx: Series(data=None, index=idx), True), - (lambda idx: Series(data={}, index=idx), False), # creates an Index[object] - (lambda idx: Series(data=(), index=idx), True), - (lambda idx: Series(data=[], index=idx), True), - (lambda idx: Series(data=(_ for _ in []), index=idx), True), + (lambda idx: Series(index=idx)), + (lambda idx: Series(None, index=idx)), + (lambda idx: Series({}, index=idx)), + (lambda idx: Series((), index=idx)), + (lambda idx: Series([], index=idx)), + (lambda idx: Series((_ for _ in []), index=idx)), + (lambda idx: Series(data=None, index=idx)), + (lambda idx: Series(data={}, index=idx)), + (lambda idx: Series(data=(), index=idx)), + (lambda idx: Series(data=[], index=idx)), + (lambda idx: Series(data=(_ for _ in []), index=idx)), ], ) @pytest.mark.parametrize("empty_index", [None, []]) - def test_empty_constructor(self, constructor, check_index_type, empty_index): - # TODO: share with frame test of the same name + def test_empty_constructor(self, constructor, empty_index): # GH 49573 (addition of empty_index parameter) expected = Series(index=empty_index) result = constructor(empty_index) assert result.dtype == object assert len(result.index) == 0 - tm.assert_series_equal(result, expected, check_index_type=check_index_type) + tm.assert_series_equal(result, expected, check_index_type=True) def test_invalid_dtype(self): # GH15520 From e5e86ce832b4bf8d5dd3142f0b9a42eacef3e3ac Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 4 Apr 2023 23:27:45 +0100 Subject: [PATCH 2/6] fix backticks --- doc/source/whatsnew/v2.0.1.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.0.1.rst b/doc/source/whatsnew/v2.0.1.rst index 3518c96ce437a..179d216c0824b 100644 --- a/doc/source/whatsnew/v2.0.1.rst +++ b/doc/source/whatsnew/v2.0.1.rst @@ -27,8 +27,8 @@ Bug fixes Other ~~~~~ -- :class:'DataFrame' created from empty dicts had :attr:'~DataFrame.columns' of dtype `object'. It is now a :class'RangeIndex' (:issue:'52404') -- :class:'Series' created from empty dicts had :attr:'~Series.index' of dtype `object'. It is now a :class'RangeIndex' (:issue:'52404') +- :class:`DataFrame` created from empty dicts had :attr:`~DataFrame.columns` of dtype ``object``. It is now a :class:`RangeIndex` (:issue:`52404`) +- :class:`Series` created from empty dicts had :attr:`~Series.index` of dtype ``object``. It is now a :class:`RangeIndex` (:issue:`52404`) .. --------------------------------------------------------------------------- .. _whatsnew_201.contributors: From ff795e05b7af9477604c2ee9055e28f2cc813e75 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 5 Apr 2023 00:54:58 +0100 Subject: [PATCH 3/6] fix empty json & parquet --- pandas/tests/io/json/test_pandas.py | 12 ++++-------- pandas/tests/io/test_parquet.py | 8 ++------ 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 08308ebd2f1cf..5fc04509b86b6 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -215,9 +215,7 @@ def test_roundtrip_empty(self, orient, convert_axes): idx = pd.Index([], dtype=(float if convert_axes else object)) expected = DataFrame(index=idx, columns=idx) elif orient in ["index", "columns"]: - # TODO: this condition is probably a bug - idx = pd.Index([], dtype=(float if convert_axes else object)) - expected = DataFrame(columns=idx) + expected = DataFrame() else: expected = empty_frame.copy() @@ -651,11 +649,9 @@ def test_series_roundtrip_empty(self, orient): data = empty_series.to_json(orient=orient) result = read_json(data, typ="series", orient=orient) - expected = empty_series - if orient in ("values", "records"): - expected = expected.reset_index(drop=True) - else: - expected.index = expected.index.astype(float) + expected = empty_series.reset_index(drop=True) + if orient in ("split"): + expected.index = expected.index.astype(np.float64) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index c74548bf63e06..a03e31d71a43b 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -1203,9 +1203,8 @@ def test_error_on_using_partition_cols_and_partition_on( def test_empty_dataframe(self, fp): # GH #27339 - df = pd.DataFrame(index=[], columns=[]) + df = pd.DataFrame() expected = df.copy() - expected.index.name = "index" check_round_trip(df, fp, expected=expected) def test_timezone_aware_index(self, fp, timezone_aware_date_list): @@ -1320,8 +1319,5 @@ def test_invalid_dtype_backend(self, engine): def test_empty_columns(self, fp): # GH 52034 df = pd.DataFrame(index=pd.Index(["a", "b", "c"], name="custom name")) - expected = pd.DataFrame( - columns=pd.Index([], dtype=object), - index=pd.Index(["a", "b", "c"], name="custom name"), - ) + expected = pd.DataFrame(index=pd.Index(["a", "b", "c"], name="custom name")) check_round_trip(df, fp, expected=expected) From b4687dfa3a7d51855d37edd1221336ca0b53b2f3 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 5 Apr 2023 14:41:28 +0100 Subject: [PATCH 4/6] update according to comments --- .../frame/constructors/test_from_dict.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pandas/tests/frame/constructors/test_from_dict.py b/pandas/tests/frame/constructors/test_from_dict.py index 3e0d1db93be03..68262d87d5d33 100644 --- a/pandas/tests/frame/constructors/test_from_dict.py +++ b/pandas/tests/frame/constructors/test_from_dict.py @@ -153,21 +153,22 @@ def test_from_dict_columns_parameter(self): DataFrame.from_dict({"A": [1, 2], "B": [4, 5]}, columns=["one", "two"]) @pytest.mark.parametrize( - "data_dict, keys, orient", + "data_dict, orient, expected", [ - ({}, [], "index"), - ([{("a",): 1}, {("a",): 2}], [("a",)], "columns"), - ([OrderedDict([(("a",), 1), (("b",), 2)])], [("a",), ("b",)], "columns"), - ([{("a", "b"): 1}], [("a", "b")], "columns"), + ({}, "index", RangeIndex(0)), + ([{("a",): 1}, {("a",): 2}], "columns", Index(("a",), tupleize_cols=False)), + ( + [{("a",): 1, ("b",): 2}], + "columns", + Index([("a",), ("b",)], tupleize_cols=False), + ), + ([{("a", "b"): 1}], Index([("a", "b")], "columns", tupleize_cols=False)), ], ) - def test_constructor_from_dict_tuples(self, data_dict, keys, orient): + def test_constructor_from_dict_tuples(self, data_dict, orient, expected): # GH#16769 df = DataFrame.from_dict(data_dict, orient) - result = df.columns - expected = Index(keys, tupleize_cols=False) if keys else RangeIndex(0) - tm.assert_index_equal(result, expected) def test_frame_dict_constructor_empty_series(self): From 677fb690ab89861b718561ec237c9bf4350769e4 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 5 Apr 2023 15:07:45 +0100 Subject: [PATCH 5/6] fix failure --- pandas/tests/frame/constructors/test_from_dict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/constructors/test_from_dict.py b/pandas/tests/frame/constructors/test_from_dict.py index 68262d87d5d33..8a4bd463ab1db 100644 --- a/pandas/tests/frame/constructors/test_from_dict.py +++ b/pandas/tests/frame/constructors/test_from_dict.py @@ -162,7 +162,7 @@ def test_from_dict_columns_parameter(self): "columns", Index([("a",), ("b",)], tupleize_cols=False), ), - ([{("a", "b"): 1}], Index([("a", "b")], "columns", tupleize_cols=False)), + ([{("a", "b"): 1}], "columns", Index([("a", "b")], tupleize_cols=False)), ], ) def test_constructor_from_dict_tuples(self, data_dict, orient, expected): From 0748063699645f37b793d246cda7e0c723bd2da8 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 5 Apr 2023 16:10:33 +0100 Subject: [PATCH 6/6] fix pre-commit --- pandas/tests/frame/constructors/test_from_dict.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/constructors/test_from_dict.py b/pandas/tests/frame/constructors/test_from_dict.py index 8a4bd463ab1db..d78924ff9d046 100644 --- a/pandas/tests/frame/constructors/test_from_dict.py +++ b/pandas/tests/frame/constructors/test_from_dict.py @@ -156,9 +156,13 @@ def test_from_dict_columns_parameter(self): "data_dict, orient, expected", [ ({}, "index", RangeIndex(0)), - ([{("a",): 1}, {("a",): 2}], "columns", Index(("a",), tupleize_cols=False)), ( - [{("a",): 1, ("b",): 2}], + [{("a",): 1}, {("a",): 2}], + "columns", + Index([("a",)], tupleize_cols=False), + ), + ( + [OrderedDict([(("a",), 1), (("b",), 2)])], "columns", Index([("a",), ("b",)], tupleize_cols=False), ),