Skip to content

API: Series/DataFrame from empty dict should have RangeIndex #52426

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v2.0.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ Bug fixes

Other
~~~~~
-
- :class:`DataFrame` created from empty dicts had :attr:`~DataFrame.columns` of dtype ``object``. It is now a :class:`RangeIndex` (:issue:`52404`)
- :class:`Series` created from empty dicts had :attr:`~Series.index` of dtype ``object``. It is now a :class:`RangeIndex` (:issue:`52404`)

.. ---------------------------------------------------------------------------
.. _whatsnew_201.contributors:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ def dict_to_mgr(

else:
keys = list(data.keys())
columns = Index(keys)
columns = Index(keys) if keys else default_index(0)
arrays = [com.maybe_iterable_to_list(data[k]) for k in keys]
arrays = [arr if not isinstance(arr, Index) else arr._data for arr in arrays]

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ def _init_dict(
values = []
keys = index
else:
keys, values = (), []
keys, values = default_index(0), []

# Input is now list-like, so rely on "standard" construction:

Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/frame/constructors/test_from_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
DataFrame,
Index,
MultiIndex,
RangeIndex,
Series,
)
import pandas._testing as tm
Expand Down Expand Up @@ -165,7 +166,7 @@ def test_constructor_from_dict_tuples(self, data_dict, keys, orient):
df = DataFrame.from_dict(data_dict, orient)

result = df.columns
expected = Index(keys, dtype="object", tupleize_cols=False)
expected = Index(keys, tupleize_cols=False) if keys else RangeIndex(0)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any chance we could include this in the parametrisation, instead of introducing an if-else in the test?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that would be better.


tm.assert_index_equal(result, expected)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def test_empty_constructor(self, constructor):
],
)
def test_empty_constructor_object_index(self, constructor):
expected = DataFrame(columns=Index([]))
expected = DataFrame(index=RangeIndex(0), columns=RangeIndex(0))
result = constructor()
assert len(result.index) == 0
assert len(result.columns) == 0
Expand Down
12 changes: 4 additions & 8 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,9 +215,7 @@ def test_roundtrip_empty(self, orient, convert_axes):
idx = pd.Index([], dtype=(float if convert_axes else object))
expected = DataFrame(index=idx, columns=idx)
elif orient in ["index", "columns"]:
# TODO: this condition is probably a bug
idx = pd.Index([], dtype=(float if convert_axes else object))
expected = DataFrame(columns=idx)
expected = DataFrame()
else:
expected = empty_frame.copy()

Expand Down Expand Up @@ -651,11 +649,9 @@ def test_series_roundtrip_empty(self, orient):
data = empty_series.to_json(orient=orient)
result = read_json(data, typ="series", orient=orient)

expected = empty_series
if orient in ("values", "records"):
expected = expected.reset_index(drop=True)
else:
expected.index = expected.index.astype(float)
expected = empty_series.reset_index(drop=True)
if orient in ("split"):
expected.index = expected.index.astype(np.float64)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any reason to change float to np.float64? (not an issue, just trying to understand)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No reason, my fingers must have liked np.float64 better than float in this case :-)


tm.assert_series_equal(result, expected)

Expand Down
8 changes: 2 additions & 6 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -1203,9 +1203,8 @@ def test_error_on_using_partition_cols_and_partition_on(

def test_empty_dataframe(self, fp):
# GH #27339
df = pd.DataFrame(index=[], columns=[])
df = pd.DataFrame()
expected = df.copy()
expected.index.name = "index"
check_round_trip(df, fp, expected=expected)

def test_timezone_aware_index(self, fp, timezone_aware_date_list):
Expand Down Expand Up @@ -1320,8 +1319,5 @@ def test_invalid_dtype_backend(self, engine):
def test_empty_columns(self, fp):
# GH 52034
df = pd.DataFrame(index=pd.Index(["a", "b", "c"], name="custom name"))
expected = pd.DataFrame(
columns=pd.Index([], dtype=object),
index=pd.Index(["a", "b", "c"], name="custom name"),
)
expected = pd.DataFrame(index=pd.Index(["a", "b", "c"], name="custom name"))
check_round_trip(df, fp, expected=expected)
29 changes: 14 additions & 15 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,35 +93,34 @@ def test_unparsable_strings_with_dt64_dtype(self):
Series(np.array(vals, dtype=object), dtype="datetime64[ns]")

@pytest.mark.parametrize(
"constructor,check_index_type",
"constructor",
[
# NOTE: some overlap with test_constructor_empty but that test does not
# test for None or an empty generator.
# test_constructor_pass_none tests None but only with the index also
# passed.
(lambda idx: Series(index=idx), True),
(lambda idx: Series(None, index=idx), True),
(lambda idx: Series({}, index=idx), False), # creates an Index[object]
(lambda idx: Series((), index=idx), True),
(lambda idx: Series([], index=idx), True),
(lambda idx: Series((_ for _ in []), index=idx), True),
(lambda idx: Series(data=None, index=idx), True),
(lambda idx: Series(data={}, index=idx), False), # creates an Index[object]
(lambda idx: Series(data=(), index=idx), True),
(lambda idx: Series(data=[], index=idx), True),
(lambda idx: Series(data=(_ for _ in []), index=idx), True),
(lambda idx: Series(index=idx)),
(lambda idx: Series(None, index=idx)),
(lambda idx: Series({}, index=idx)),
(lambda idx: Series((), index=idx)),
(lambda idx: Series([], index=idx)),
(lambda idx: Series((_ for _ in []), index=idx)),
(lambda idx: Series(data=None, index=idx)),
(lambda idx: Series(data={}, index=idx)),
(lambda idx: Series(data=(), index=idx)),
(lambda idx: Series(data=[], index=idx)),
(lambda idx: Series(data=(_ for _ in []), index=idx)),
],
)
@pytest.mark.parametrize("empty_index", [None, []])
def test_empty_constructor(self, constructor, check_index_type, empty_index):
# TODO: share with frame test of the same name
def test_empty_constructor(self, constructor, empty_index):
# GH 49573 (addition of empty_index parameter)
expected = Series(index=empty_index)
result = constructor(empty_index)

assert result.dtype == object
assert len(result.index) == 0
tm.assert_series_equal(result, expected, check_index_type=check_index_type)
tm.assert_series_equal(result, expected, check_index_type=True)
Comment on lines +116 to +123
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice, lovely simplification here!


def test_invalid_dtype(self):
# GH15520
Expand Down