pandas-dev · mroeschke · Dec 7, 2022 · Nov 27, 2022 · Nov 27, 2022 · Nov 27, 2022
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -311,6 +311,35 @@ The new behavior, as for datetime64, either gives exactly the requested dtype or
    ser.astype("timedelta64[s]")
    ser.astype("timedelta64[D]")
 
+.. _whatsnew_200.api_breaking.zero_len_indexes:
+
+Empty DataFrames/Series will now default to have a ``RangeIndex``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Before, constructing an empty (where ``data`` is ``None`` or an empty list-like argument) :class:`Series` or :class:`DataFrame` without
+specifying the axes (``index=None``, ``columns=None``) would return the axes as empty :class:`Index` with object dtype.
+
+Now, the axes return an empty :class:`RangeIndex`.
+
+*Previous behavior*:
+
+.. code-block:: ipython
+
+   In [8]: pd.Series().index
+   Out[8]:
+   Index([], dtype='object')
+
+   In [9] pd.DataFrame().axes
+   Out[9]:
+   [Index([], dtype='object'), Index([], dtype='object')]
+
+*New behavior*:
+
+.. ipython:: python
+
+   pd.Series().index
+   pd.DataFrame().axes
+
 .. _whatsnew_200.api_breaking.deps:
 
 Increased minimum versions for dependencies
@@ -370,6 +399,7 @@ Other API changes
 - Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`)
 - Changed behavior of :meth:`DataFrame.shift` with ``axis=1``, an integer ``fill_value``, and homogeneous datetime-like dtype, this now fills new columns with integer dtypes instead of casting to datetimelike (:issue:`49842`)
 - Files are now closed when encountering an exception in :func:`read_json` (:issue:`49921`)
+- Changed behavior of :func:`read_csv`, :func:`read_json` & :func:`read_fwf`, where the index will now always be a :class:`RangeIndex`, when no index is specified. Previously the index would be a :class:`Index` with dtype ``object`` if the new DataFrame/Series has length 0 (:issue:`49572`)
 - :meth:`DataFrame.values`, :meth:`DataFrame.to_numpy`, :meth:`DataFrame.xs`, :meth:`DataFrame.reindex`, :meth:`DataFrame.fillna`, and :meth:`DataFrame.replace` no longer silently consolidate the underlying arrays; do ``df = df.copy()`` to ensure consolidation (:issue:`49356`)
 -
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -632,8 +632,6 @@ def __init__(
         copy: bool | None = None,
     ) -> None:
 
-        if data is None:
-            data = {}
         if dtype is not None:
             dtype = self._validate_dtype(dtype)
 
@@ -671,6 +669,12 @@ def __init__(
             else:
                 copy = False
 
+        if data is None:
+            index = index if index is not None else default_index(0)
+            columns = columns if columns is not None else default_index(0)
+            dtype = dtype if dtype is not None else pandas_dtype(object)
+            data = []
+
         if isinstance(data, (BlockManager, ArrayManager)):
             mgr = self._init_mgr(
                 data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy
@@ -777,7 +781,7 @@ def __init__(
                 mgr = dict_to_mgr(
                     {},
                     index,
-                    columns,
+                    columns if columns is not None else default_index(0),
                     dtype=dtype,
                     typ=manager,
                 )
@@ -2309,8 +2313,7 @@ def maybe_reorder(
 
             result_index = None
             if len(arrays) == 0 and index is None and length == 0:
-                # for backward compat use an object Index instead of RangeIndex
-                result_index = Index([])
+                result_index = default_index(0)
 
             arrays, arr_columns = reorder_arrays(arrays, arr_columns, columns, length)
             return arrays, arr_columns, result_index

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
@@ -582,7 +582,7 @@ def _extract_index(data) -> Index:
     """
     index: Index
     if len(data) == 0:
-        return Index([])
+        return default_index(0)
 
     raw_lengths = []
     indexes: list[list[Hashable] | Index] = []

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -1084,8 +1084,8 @@ def _get_join_info(
             else:
                 join_index = default_index(len(left_indexer))
 
-        if len(join_index) == 0:
-            join_index = join_index.astype(object)
+        if len(join_index) == 0 and not isinstance(join_index, MultiIndex):
+            join_index = default_index(0).set_names(join_index.name)
         return join_index, left_indexer, right_indexer
 
     def _create_join_index(

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -385,11 +385,16 @@ def __init__(
         if index is not None:
             index = ensure_index(index)
 
-        if data is None:
-            data = {}
         if dtype is not None:
             dtype = self._validate_dtype(dtype)
 
+        if data is None:
+            index = index if index is not None else default_index(0)
+            if len(index) or dtype is not None:
+                data = na_value_for_dtype(pandas_dtype(dtype), compat=False)
+            else:
+                data = []
+
         if isinstance(data, MultiIndex):
             raise NotImplementedError(
                 "initializing a Series from a MultiIndex is not supported"

diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py
@@ -30,6 +30,8 @@ def dataframe_from_int_dict(data, frame_template):
             result = DataFrame(data, index=frame_template.index)
             if len(result.columns) > 0:
                 result.columns = frame_template.columns[result.columns]
+            else:
+                result.columns = frame_template.columns.copy()
             return result
 
         results = {}

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -84,6 +84,7 @@
 from pandas.core.indexes.api import (
     Index,
     MultiIndex,
+    default_index,
     ensure_index_from_sequences,
 )
 from pandas.core.series import Series
@@ -1093,8 +1094,9 @@ def _get_empty_meta(
         #
         # Both must be non-null to ensure a successful construction. Otherwise,
         # we have to create a generic empty Index.
+        index: Index
         if (index_col is None or index_col is False) or index_names is None:
-            index = Index([])
+            index = default_index(0)
         else:
             data = [Series([], dtype=dtype_dict[name]) for name in index_names]
             index = ensure_index_from_sequences(data, names=index_names)

diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
@@ -114,14 +114,14 @@ def test_apply_with_reduce_empty():
     result = empty_frame.apply(x.append, axis=1, result_type="expand")
     tm.assert_frame_equal(result, empty_frame)
     result = empty_frame.apply(x.append, axis=1, result_type="reduce")
-    expected = Series([], index=pd.Index([], dtype=object), dtype=np.float64)
+    expected = Series([], dtype=np.float64)
     tm.assert_series_equal(result, expected)
 
     empty_with_cols = DataFrame(columns=["a", "b", "c"])
     result = empty_with_cols.apply(x.append, axis=1, result_type="expand")
     tm.assert_frame_equal(result, empty_with_cols)
     result = empty_with_cols.apply(x.append, axis=1, result_type="reduce")
-    expected = Series([], index=pd.Index([], dtype=object), dtype=np.float64)
+    expected = Series([], dtype=np.float64)
     tm.assert_series_equal(result, expected)
 
     # Ensure that x.append hasn't been called
@@ -147,7 +147,7 @@ def test_nunique_empty():
     tm.assert_series_equal(result, expected)
 
     result = df.T.nunique()
-    expected = Series([], index=pd.Index([]), dtype=np.float64)
+    expected = Series([], dtype=np.float64)
     tm.assert_series_equal(result, expected)
 
 

diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py
@@ -8,7 +8,6 @@
 
 from pandas import (
     DataFrame,
-    Index,
     Series,
 )
 import pandas._testing as tm
@@ -149,8 +148,8 @@ def test_agg_cython_table_series(series, func, expected):
         tm.get_cython_table_params(
             Series(dtype=np.float64),
             [
-                ("cumprod", Series([], Index([]), dtype=np.float64)),
-                ("cumsum", Series([], Index([]), dtype=np.float64)),
+                ("cumprod", Series([], dtype=np.float64)),
+                ("cumsum", Series([], dtype=np.float64)),
             ],
         ),
         tm.get_cython_table_params(

diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py
@@ -119,7 +119,7 @@ def test_construct_empty_dataframe(self, dtype):
         # GH 33623
         result = pd.DataFrame(columns=["a"], dtype=dtype)
         expected = pd.DataFrame(
-            {"a": pd.array([], dtype=dtype)}, index=pd.Index([], dtype="object")
+            {"a": pd.array([], dtype=dtype)}, index=pd.RangeIndex(0)
         )
         self.assert_frame_equal(result, expected)
 

diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py
@@ -55,7 +55,7 @@ def test_dropna_frame(self, data_missing):
 
         # axis = 1
         result = df.dropna(axis="columns")
-        expected = pd.DataFrame(index=[0, 1])
+        expected = pd.DataFrame(index=pd.RangeIndex(2), columns=pd.Index([]))
         self.assert_frame_equal(result, expected)
 
         # multiple

diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py
@@ -84,7 +84,7 @@ def test_xs_corner(self):
         # no columns but Index(dtype=object)
         df = DataFrame(index=["a", "b", "c"])
         result = df.xs("a")
-        expected = Series([], name="a", index=Index([]), dtype=np.float64)
+        expected = Series([], name="a", dtype=np.float64)
         tm.assert_series_equal(result, expected)
 
     def test_xs_duplicates(self):

diff --git a/pandas/tests/frame/methods/test_count.py b/pandas/tests/frame/methods/test_count.py
@@ -28,7 +28,7 @@ def test_count(self):
 
         df = DataFrame()
         result = df.count()
-        expected = Series(0, index=[])
+        expected = Series(dtype="int64")
         tm.assert_series_equal(result, expected)
 
     def test_count_objects(self, float_string_frame):

diff --git a/pandas/tests/frame/methods/test_get_numeric_data.py b/pandas/tests/frame/methods/test_get_numeric_data.py
@@ -17,7 +17,7 @@ def test_get_numeric_data_preserve_dtype(self):
         # get the numeric data
         obj = DataFrame({"A": [1, "2", 3.0]})
         result = obj._get_numeric_data()
-        expected = DataFrame(index=[0, 1, 2], dtype=object)
+        expected = DataFrame(dtype=object, index=pd.RangeIndex(3), columns=[])
         tm.assert_frame_equal(result, expected)
 
     def test_get_numeric_data(self):

diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py
@@ -420,7 +420,7 @@ def test_quantile_datetime(self):
         tm.assert_series_equal(result, expected)
 
         result = df[["a", "c"]].quantile([0.5], numeric_only=True)
-        expected = DataFrame(index=[0.5])
+        expected = DataFrame(index=[0.5], columns=[])
         tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize(
@@ -451,7 +451,7 @@ def test_quantile_dt64_empty(self, dtype, interp_method):
             interpolation=interpolation,
             method=method,
         )
-        expected = DataFrame(index=[0.5])
+        expected = DataFrame(index=[0.5], columns=[])
         tm.assert_frame_equal(res, expected)
 
     @pytest.mark.parametrize("invalid", [-1, 2, [0.5, -1], [0.5, 2]])

diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py
@@ -483,7 +483,7 @@ def test_rank_object_first(self, frame_or_series, na_option, ascending, expected
         "data,expected",
         [
             ({"a": [1, 2, "a"], "b": [4, 5, 6]}, DataFrame({"b": [1.0, 2.0, 3.0]})),
-            ({"a": [1, 2, "a"]}, DataFrame(index=range(3))),
+            ({"a": [1, 2, "a"]}, DataFrame(index=range(3), columns=[])),
         ],
     )
     def test_rank_mixed_axis_zero(self, data, expected):

diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
@@ -390,7 +390,7 @@ def test_to_csv_dup_cols(self, nrows):
     def test_to_csv_empty(self):
         df = DataFrame(index=np.arange(10))
         result, expected = self._return_result_expected(df, 1000)
-        tm.assert_frame_equal(result, expected, check_names=False)
+        tm.assert_frame_equal(result, expected, check_column_type=False)
 
     @pytest.mark.slow
     def test_to_csv_chunksize(self):

diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -193,13 +193,11 @@ def test_series_with_name_not_matching_column(self):
         [
             lambda: DataFrame(),
             lambda: DataFrame(None),
-            lambda: DataFrame({}),
             lambda: DataFrame(()),
             lambda: DataFrame([]),
             lambda: DataFrame(_ for _ in []),
             lambda: DataFrame(range(0)),
             lambda: DataFrame(data=None),
-            lambda: DataFrame(data={}),
             lambda: DataFrame(data=()),
             lambda: DataFrame(data=[]),
             lambda: DataFrame(data=(_ for _ in [])),
@@ -213,6 +211,20 @@ def test_empty_constructor(self, constructor):
         assert len(result.columns) == 0
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize(
+        "constructor",
+        [
+            lambda: DataFrame({}),
+            lambda: DataFrame(data={}),
+        ],
+    )
+    def test_empty_constructor_object_index(self, constructor):
+        expected = DataFrame(columns=Index([]))
+        result = constructor()
+        assert len(result.index) == 0
+        assert len(result.columns) == 0
+        tm.assert_frame_equal(result, expected, check_index_type=True)
+
     @pytest.mark.parametrize(
         "emptylike,expected_index,expected_columns",
         [
@@ -1391,7 +1403,7 @@ def test_constructor_generator(self):
     def test_constructor_list_of_dicts(self):
 
         result = DataFrame([{}])
-        expected = DataFrame(index=[0])
+        expected = DataFrame(index=RangeIndex(1), columns=[])
         tm.assert_frame_equal(result, expected)
 
     def test_constructor_ordered_dict_nested_preserve_order(self):
@@ -1762,7 +1774,7 @@ def test_constructor_empty_with_string_dtype(self):
 
     def test_constructor_empty_with_string_extension(self, nullable_string_dtype):
         # GH 34915
-        expected = DataFrame(index=[], columns=["c1"], dtype=nullable_string_dtype)
+        expected = DataFrame(columns=["c1"], dtype=nullable_string_dtype)
         df = DataFrame(columns=["c1"], dtype=nullable_string_dtype)
         tm.assert_frame_equal(df, expected)
 

diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
@@ -1163,7 +1163,7 @@ def test_any_all_bool_only(self):
         )
 
         result = df.all(bool_only=True)
-        expected = Series(dtype=np.bool_)
+        expected = Series(dtype=np.bool_, index=[])
         tm.assert_series_equal(result, expected)
 
         df = DataFrame(

diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
@@ -1251,7 +1251,8 @@ def test_stack_timezone_aware_values():
 @pytest.mark.parametrize("dropna", [True, False])
 def test_stack_empty_frame(dropna):
     # GH 36113
-    expected = Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64)
+    levels = [np.array([], dtype=np.int64), np.array([], dtype=np.int64)]
+    expected = Series(dtype=np.float64, index=MultiIndex(levels=levels, codes=[[], []]))
     result = DataFrame(dtype=np.float64).stack(dropna=dropna)
     tm.assert_series_equal(result, expected)
 

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -674,7 +674,7 @@ def test_no_args_raises(self):
 
         # but we do allow this
         result = gr.agg([])
-        expected = DataFrame()
+        expected = DataFrame(columns=[])
         tm.assert_frame_equal(result, expected)
 
     def test_series_named_agg_duplicates_no_raises(self):

diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
@@ -103,7 +103,9 @@ def test_cython_agg_nothing_to_agg():
     with pytest.raises(TypeError, match="Could not convert"):
         frame[["b"]].groupby(frame["a"]).mean()
     result = frame[["b"]].groupby(frame["a"]).mean(numeric_only=True)
-    expected = DataFrame([], index=frame["a"].sort_values().drop_duplicates())
+    expected = DataFrame(
+        [], index=frame["a"].sort_values().drop_duplicates(), columns=[]
+    )
     tm.assert_frame_equal(result, expected)
 
 

diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
@@ -683,7 +683,7 @@ def test_list_grouper_with_nat(self):
         [
             (
                 "transform",
-                Series(name=2, dtype=np.float64, index=Index([])),
+                Series(name=2, dtype=np.float64),
             ),
             (
                 "agg",
@@ -875,7 +875,7 @@ def test_groupby_with_single_column(self):
         df = DataFrame({"a": list("abssbab")})
         tm.assert_frame_equal(df.groupby("a").get_group("a"), df.iloc[[0, 5]])
         # GH 13530
-        exp = DataFrame(index=Index(["a", "b", "s"], name="a"))
+        exp = DataFrame(index=Index(["a", "b", "s"], name="a"), columns=[])
         tm.assert_frame_equal(df.groupby("a").count(), exp)
         tm.assert_frame_equal(df.groupby("a").sum(), exp)
 

diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
@@ -246,7 +246,7 @@ def check(result, expected):
             tm.assert_frame_equal(result, expected)
 
         dfl = DataFrame(np.random.randn(5, 2), columns=list("AB"))
-        check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index))
+        check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index, columns=[]))
         check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]])
         check(dfl.iloc[4:6], dfl.iloc[[4]])