Skip to content

API: Series/DataFrame from empty dict should have RangeIndex #52426

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v2.0.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ Bug fixes

Other
~~~~~
-
- :class:`DataFrame` created from empty dicts had :attr:`~DataFrame.columns` of dtype ``object``. It is now a :class:`RangeIndex` (:issue:`52404`)
- :class:`Series` created from empty dicts had :attr:`~Series.index` of dtype ``object``. It is now a :class:`RangeIndex` (:issue:`52404`)

.. ---------------------------------------------------------------------------
.. _whatsnew_201.contributors:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ def dict_to_mgr(

else:
keys = list(data.keys())
columns = Index(keys)
columns = Index(keys) if keys else default_index(0)
arrays = [com.maybe_iterable_to_list(data[k]) for k in keys]
arrays = [arr if not isinstance(arr, Index) else arr._data for arr in arrays]

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ def _init_dict(
values = []
keys = index
else:
keys, values = (), []
keys, values = default_index(0), []

# Input is now list-like, so rely on "standard" construction:

Expand Down
24 changes: 15 additions & 9 deletions pandas/tests/frame/constructors/test_from_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
DataFrame,
Index,
MultiIndex,
RangeIndex,
Series,
)
import pandas._testing as tm
Expand Down Expand Up @@ -152,21 +153,26 @@ def test_from_dict_columns_parameter(self):
DataFrame.from_dict({"A": [1, 2], "B": [4, 5]}, columns=["one", "two"])

@pytest.mark.parametrize(
"data_dict, keys, orient",
"data_dict, orient, expected",
[
({}, [], "index"),
([{("a",): 1}, {("a",): 2}], [("a",)], "columns"),
([OrderedDict([(("a",), 1), (("b",), 2)])], [("a",), ("b",)], "columns"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what happened to the ordereddict test case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I've added it back in.

([{("a", "b"): 1}], [("a", "b")], "columns"),
({}, "index", RangeIndex(0)),
(
[{("a",): 1}, {("a",): 2}],
"columns",
Index([("a",)], tupleize_cols=False),
),
(
[OrderedDict([(("a",), 1), (("b",), 2)])],
"columns",
Index([("a",), ("b",)], tupleize_cols=False),
),
([{("a", "b"): 1}], "columns", Index([("a", "b")], tupleize_cols=False)),
],
)
def test_constructor_from_dict_tuples(self, data_dict, keys, orient):
def test_constructor_from_dict_tuples(self, data_dict, orient, expected):
# GH#16769
df = DataFrame.from_dict(data_dict, orient)

result = df.columns
expected = Index(keys, dtype="object", tupleize_cols=False)

tm.assert_index_equal(result, expected)

def test_frame_dict_constructor_empty_series(self):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def test_empty_constructor(self, constructor):
],
)
def test_empty_constructor_object_index(self, constructor):
expected = DataFrame(columns=Index([]))
expected = DataFrame(index=RangeIndex(0), columns=RangeIndex(0))
result = constructor()
assert len(result.index) == 0
assert len(result.columns) == 0
Expand Down
12 changes: 4 additions & 8 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,7 @@ def test_roundtrip_empty(self, orient, convert_axes):
idx = pd.Index([], dtype=(float if convert_axes else object))
expected = DataFrame(index=idx, columns=idx)
elif orient in ["index", "columns"]:
# TODO: this condition is probably a bug
idx = pd.Index([], dtype=(float if convert_axes else object))
expected = DataFrame(columns=idx)
expected = DataFrame()
else:
expected = empty_frame.copy()

Expand Down Expand Up @@ -651,11 +649,9 @@ def test_series_roundtrip_empty(self, orient):
data = empty_series.to_json(orient=orient)
result = read_json(data, typ="series", orient=orient)

expected = empty_series
if orient in ("values", "records"):
expected = expected.reset_index(drop=True)
else:
expected.index = expected.index.astype(float)
expected = empty_series.reset_index(drop=True)
if orient in ("split"):
expected.index = expected.index.astype(np.float64)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any reason to change float to np.float64? (not an issue, just trying to understand)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No reason, my fingers must have liked np.float64 better than float in this case :-)


tm.assert_series_equal(result, expected)

Expand Down
8 changes: 2 additions & 6 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -1203,9 +1203,8 @@ def test_error_on_using_partition_cols_and_partition_on(

def test_empty_dataframe(self, fp):
# GH #27339
df = pd.DataFrame(index=[], columns=[])
df = pd.DataFrame()
expected = df.copy()
expected.index.name = "index"
check_round_trip(df, fp, expected=expected)

def test_timezone_aware_index(self, fp, timezone_aware_date_list):
Expand Down Expand Up @@ -1320,8 +1319,5 @@ def test_invalid_dtype_backend(self, engine):
def test_empty_columns(self, fp):
# GH 52034
df = pd.DataFrame(index=pd.Index(["a", "b", "c"], name="custom name"))
expected = pd.DataFrame(
columns=pd.Index([], dtype=object),
index=pd.Index(["a", "b", "c"], name="custom name"),
)
expected = pd.DataFrame(index=pd.Index(["a", "b", "c"], name="custom name"))
check_round_trip(df, fp, expected=expected)
29 changes: 14 additions & 15 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,35 +93,34 @@ def test_unparsable_strings_with_dt64_dtype(self):
Series(np.array(vals, dtype=object), dtype="datetime64[ns]")

@pytest.mark.parametrize(
"constructor,check_index_type",
"constructor",
[
# NOTE: some overlap with test_constructor_empty but that test does not
# test for None or an empty generator.
# test_constructor_pass_none tests None but only with the index also
# passed.
(lambda idx: Series(index=idx), True),
(lambda idx: Series(None, index=idx), True),
(lambda idx: Series({}, index=idx), False), # creates an Index[object]
(lambda idx: Series((), index=idx), True),
(lambda idx: Series([], index=idx), True),
(lambda idx: Series((_ for _ in []), index=idx), True),
(lambda idx: Series(data=None, index=idx), True),
(lambda idx: Series(data={}, index=idx), False), # creates an Index[object]
(lambda idx: Series(data=(), index=idx), True),
(lambda idx: Series(data=[], index=idx), True),
(lambda idx: Series(data=(_ for _ in []), index=idx), True),
(lambda idx: Series(index=idx)),
(lambda idx: Series(None, index=idx)),
(lambda idx: Series({}, index=idx)),
(lambda idx: Series((), index=idx)),
(lambda idx: Series([], index=idx)),
(lambda idx: Series((_ for _ in []), index=idx)),
(lambda idx: Series(data=None, index=idx)),
(lambda idx: Series(data={}, index=idx)),
(lambda idx: Series(data=(), index=idx)),
(lambda idx: Series(data=[], index=idx)),
(lambda idx: Series(data=(_ for _ in []), index=idx)),
],
)
@pytest.mark.parametrize("empty_index", [None, []])
def test_empty_constructor(self, constructor, check_index_type, empty_index):
# TODO: share with frame test of the same name
def test_empty_constructor(self, constructor, empty_index):
# GH 49573 (addition of empty_index parameter)
expected = Series(index=empty_index)
result = constructor(empty_index)

assert result.dtype == object
assert len(result.index) == 0
tm.assert_series_equal(result, expected, check_index_type=check_index_type)
tm.assert_series_equal(result, expected, check_index_type=True)
Comment on lines +116 to +123
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice, lovely simplification here!


def test_invalid_dtype(self):
# GH15520
Expand Down