Skip to content

Commit 865c0d6

Browse files
Backport PR #52426 on branch 2.0.x (API: Series/DataFrame from empty dict should have RangeIndex) (#52578)
Backport PR #52426: API: Series/DataFrame from empty dict should have RangeIndex Co-authored-by: Terji Petersen <[email protected]>
1 parent bedfb57 commit 865c0d6

File tree

8 files changed

+40
-42
lines changed

8 files changed

+40
-42
lines changed

doc/source/whatsnew/v2.0.1.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ Bug fixes
3232

3333
Other
3434
~~~~~
35-
-
35+
- :class:`DataFrame` created from empty dicts had :attr:`~DataFrame.columns` of dtype ``object``. It is now a :class:`RangeIndex` (:issue:`52404`)
36+
- :class:`Series` created from empty dicts had :attr:`~Series.index` of dtype ``object``. It is now a :class:`RangeIndex` (:issue:`52404`)
3637

3738
.. ---------------------------------------------------------------------------
3839
.. _whatsnew_201.contributors:

pandas/core/internals/construction.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,7 @@ def dict_to_mgr(
465465

466466
else:
467467
keys = list(data.keys())
468-
columns = Index(keys)
468+
columns = Index(keys) if keys else default_index(0)
469469
arrays = [com.maybe_iterable_to_list(data[k]) for k in keys]
470470
arrays = [arr if not isinstance(arr, Index) else arr._data for arr in arrays]
471471

pandas/core/series.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -559,7 +559,7 @@ def _init_dict(
559559
values = []
560560
keys = index
561561
else:
562-
keys, values = (), []
562+
keys, values = default_index(0), []
563563

564564
# Input is now list-like, so rely on "standard" construction:
565565
s = Series(values, index=keys, dtype=dtype)

pandas/tests/frame/constructors/test_from_dict.py

+15-9
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
DataFrame,
88
Index,
99
MultiIndex,
10+
RangeIndex,
1011
Series,
1112
)
1213
import pandas._testing as tm
@@ -152,21 +153,26 @@ def test_from_dict_columns_parameter(self):
152153
DataFrame.from_dict({"A": [1, 2], "B": [4, 5]}, columns=["one", "two"])
153154

154155
@pytest.mark.parametrize(
155-
"data_dict, keys, orient",
156+
"data_dict, orient, expected",
156157
[
157-
({}, [], "index"),
158-
([{("a",): 1}, {("a",): 2}], [("a",)], "columns"),
159-
([OrderedDict([(("a",), 1), (("b",), 2)])], [("a",), ("b",)], "columns"),
160-
([{("a", "b"): 1}], [("a", "b")], "columns"),
158+
({}, "index", RangeIndex(0)),
159+
(
160+
[{("a",): 1}, {("a",): 2}],
161+
"columns",
162+
Index([("a",)], tupleize_cols=False),
163+
),
164+
(
165+
[OrderedDict([(("a",), 1), (("b",), 2)])],
166+
"columns",
167+
Index([("a",), ("b",)], tupleize_cols=False),
168+
),
169+
([{("a", "b"): 1}], "columns", Index([("a", "b")], tupleize_cols=False)),
161170
],
162171
)
163-
def test_constructor_from_dict_tuples(self, data_dict, keys, orient):
172+
def test_constructor_from_dict_tuples(self, data_dict, orient, expected):
164173
# GH#16769
165174
df = DataFrame.from_dict(data_dict, orient)
166-
167175
result = df.columns
168-
expected = Index(keys, dtype="object", tupleize_cols=False)
169-
170176
tm.assert_index_equal(result, expected)
171177

172178
def test_frame_dict_constructor_empty_series(self):

pandas/tests/frame/test_constructors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ def test_empty_constructor(self, constructor):
224224
],
225225
)
226226
def test_empty_constructor_object_index(self, constructor):
227-
expected = DataFrame(columns=Index([]))
227+
expected = DataFrame(index=RangeIndex(0), columns=RangeIndex(0))
228228
result = constructor()
229229
assert len(result.index) == 0
230230
assert len(result.columns) == 0

pandas/tests/io/json/test_pandas.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -215,9 +215,7 @@ def test_roundtrip_empty(self, orient, convert_axes):
215215
idx = pd.Index([], dtype=(float if convert_axes else object))
216216
expected = DataFrame(index=idx, columns=idx)
217217
elif orient in ["index", "columns"]:
218-
# TODO: this condition is probably a bug
219-
idx = pd.Index([], dtype=(float if convert_axes else object))
220-
expected = DataFrame(columns=idx)
218+
expected = DataFrame()
221219
else:
222220
expected = empty_frame.copy()
223221

@@ -651,11 +649,9 @@ def test_series_roundtrip_empty(self, orient):
651649
data = empty_series.to_json(orient=orient)
652650
result = read_json(data, typ="series", orient=orient)
653651

654-
expected = empty_series
655-
if orient in ("values", "records"):
656-
expected = expected.reset_index(drop=True)
657-
else:
658-
expected.index = expected.index.astype(float)
652+
expected = empty_series.reset_index(drop=True)
653+
if orient in ("split"):
654+
expected.index = expected.index.astype(np.float64)
659655

660656
tm.assert_series_equal(result, expected)
661657

pandas/tests/io/test_parquet.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -1203,9 +1203,8 @@ def test_error_on_using_partition_cols_and_partition_on(
12031203

12041204
def test_empty_dataframe(self, fp):
12051205
# GH #27339
1206-
df = pd.DataFrame(index=[], columns=[])
1206+
df = pd.DataFrame()
12071207
expected = df.copy()
1208-
expected.index.name = "index"
12091208
check_round_trip(df, fp, expected=expected)
12101209

12111210
def test_timezone_aware_index(self, fp, timezone_aware_date_list):
@@ -1260,8 +1259,5 @@ def test_invalid_dtype_backend(self, engine):
12601259
def test_empty_columns(self, fp):
12611260
# GH 52034
12621261
df = pd.DataFrame(index=pd.Index(["a", "b", "c"], name="custom name"))
1263-
expected = pd.DataFrame(
1264-
columns=pd.Index([], dtype=object),
1265-
index=pd.Index(["a", "b", "c"], name="custom name"),
1266-
)
1262+
expected = pd.DataFrame(index=pd.Index(["a", "b", "c"], name="custom name"))
12671263
check_round_trip(df, fp, expected=expected)

pandas/tests/series/test_constructors.py

+14-15
Original file line numberDiff line numberDiff line change
@@ -93,35 +93,34 @@ def test_unparseable_strings_with_dt64_dtype(self):
9393
Series(np.array(vals, dtype=object), dtype="datetime64[ns]")
9494

9595
@pytest.mark.parametrize(
96-
"constructor,check_index_type",
96+
"constructor",
9797
[
9898
# NOTE: some overlap with test_constructor_empty but that test does not
9999
# test for None or an empty generator.
100100
# test_constructor_pass_none tests None but only with the index also
101101
# passed.
102-
(lambda idx: Series(index=idx), True),
103-
(lambda idx: Series(None, index=idx), True),
104-
(lambda idx: Series({}, index=idx), False), # creates an Index[object]
105-
(lambda idx: Series((), index=idx), True),
106-
(lambda idx: Series([], index=idx), True),
107-
(lambda idx: Series((_ for _ in []), index=idx), True),
108-
(lambda idx: Series(data=None, index=idx), True),
109-
(lambda idx: Series(data={}, index=idx), False), # creates an Index[object]
110-
(lambda idx: Series(data=(), index=idx), True),
111-
(lambda idx: Series(data=[], index=idx), True),
112-
(lambda idx: Series(data=(_ for _ in []), index=idx), True),
102+
(lambda idx: Series(index=idx)),
103+
(lambda idx: Series(None, index=idx)),
104+
(lambda idx: Series({}, index=idx)),
105+
(lambda idx: Series((), index=idx)),
106+
(lambda idx: Series([], index=idx)),
107+
(lambda idx: Series((_ for _ in []), index=idx)),
108+
(lambda idx: Series(data=None, index=idx)),
109+
(lambda idx: Series(data={}, index=idx)),
110+
(lambda idx: Series(data=(), index=idx)),
111+
(lambda idx: Series(data=[], index=idx)),
112+
(lambda idx: Series(data=(_ for _ in []), index=idx)),
113113
],
114114
)
115115
@pytest.mark.parametrize("empty_index", [None, []])
116-
def test_empty_constructor(self, constructor, check_index_type, empty_index):
117-
# TODO: share with frame test of the same name
116+
def test_empty_constructor(self, constructor, empty_index):
118117
# GH 49573 (addition of empty_index parameter)
119118
expected = Series(index=empty_index)
120119
result = constructor(empty_index)
121120

122121
assert result.dtype == object
123122
assert len(result.index) == 0
124-
tm.assert_series_equal(result, expected, check_index_type=check_index_type)
123+
tm.assert_series_equal(result, expected, check_index_type=True)
125124

126125
def test_invalid_dtype(self):
127126
# GH15520

0 commit comments

Comments
 (0)