diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 7838ef8df4164..1fc7a06f8b7f0 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -311,6 +311,35 @@ The new behavior, as for datetime64, either gives exactly the requested dtype or ser.astype("timedelta64[s]") ser.astype("timedelta64[D]") +.. _whatsnew_200.api_breaking.zero_len_indexes: + +Empty DataFrames/Series will now default to have a ``RangeIndex`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Before, constructing an empty (where ``data`` is ``None`` or an empty list-like argument) :class:`Series` or :class:`DataFrame` without +specifying the axes (``index=None``, ``columns=None``) would return the axes as empty :class:`Index` with object dtype. + +Now, the axes return an empty :class:`RangeIndex`. + +*Previous behavior*: + +.. code-block:: ipython + + In [8]: pd.Series().index + Out[8]: + Index([], dtype='object') + + In [9] pd.DataFrame().axes + Out[9]: + [Index([], dtype='object'), Index([], dtype='object')] + +*New behavior*: + +.. ipython:: python + + pd.Series().index + pd.DataFrame().axes + .. _whatsnew_200.api_breaking.deps: Increased minimum versions for dependencies @@ -370,6 +399,7 @@ Other API changes - Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`) - Changed behavior of :meth:`DataFrame.shift` with ``axis=1``, an integer ``fill_value``, and homogeneous datetime-like dtype, this now fills new columns with integer dtypes instead of casting to datetimelike (:issue:`49842`) - Files are now closed when encountering an exception in :func:`read_json` (:issue:`49921`) +- Changed behavior of :func:`read_csv`, :func:`read_json` & :func:`read_fwf`, where the index will now always be a :class:`RangeIndex`, when no index is specified. Previously the index would be a :class:`Index` with dtype ``object`` if the new DataFrame/Series has length 0 (:issue:`49572`) - :meth:`DataFrame.values`, :meth:`DataFrame.to_numpy`, :meth:`DataFrame.xs`, :meth:`DataFrame.reindex`, :meth:`DataFrame.fillna`, and :meth:`DataFrame.replace` no longer silently consolidate the underlying arrays; do ``df = df.copy()`` to ensure consolidation (:issue:`49356`) - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 98af277cc0bd7..37c48bb7adbba 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -632,8 +632,6 @@ def __init__( copy: bool | None = None, ) -> None: - if data is None: - data = {} if dtype is not None: dtype = self._validate_dtype(dtype) @@ -671,6 +669,12 @@ def __init__( else: copy = False + if data is None: + index = index if index is not None else default_index(0) + columns = columns if columns is not None else default_index(0) + dtype = dtype if dtype is not None else pandas_dtype(object) + data = [] + if isinstance(data, (BlockManager, ArrayManager)): mgr = self._init_mgr( data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy @@ -777,7 +781,7 @@ def __init__( mgr = dict_to_mgr( {}, index, - columns, + columns if columns is not None else default_index(0), dtype=dtype, typ=manager, ) @@ -2309,8 +2313,7 @@ def maybe_reorder( result_index = None if len(arrays) == 0 and index is None and length == 0: - # for backward compat use an object Index instead of RangeIndex - result_index = Index([]) + result_index = default_index(0) arrays, arr_columns = reorder_arrays(arrays, arr_columns, columns, length) return arrays, arr_columns, result_index diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 563011abe2c41..07fab0080a747 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -582,7 +582,7 @@ def _extract_index(data) -> Index: """ index: Index if len(data) == 0: - return Index([]) + return default_index(0) raw_lengths = [] indexes: list[list[Hashable] | Index] = [] diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index c422b5b14cacc..16f1a5d0b81e2 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1084,8 +1084,8 @@ def _get_join_info( else: join_index = default_index(len(left_indexer)) - if len(join_index) == 0: - join_index = join_index.astype(object) + if len(join_index) == 0 and not isinstance(join_index, MultiIndex): + join_index = default_index(0).set_names(join_index.name) return join_index, left_indexer, right_indexer def _create_join_index( diff --git a/pandas/core/series.py b/pandas/core/series.py index 1e5f565934b50..3d2783b939743 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -385,11 +385,16 @@ def __init__( if index is not None: index = ensure_index(index) - if data is None: - data = {} if dtype is not None: dtype = self._validate_dtype(dtype) + if data is None: + index = index if index is not None else default_index(0) + if len(index) or dtype is not None: + data = na_value_for_dtype(pandas_dtype(dtype), compat=False) + else: + data = [] + if isinstance(data, MultiIndex): raise NotImplementedError( "initializing a Series from a MultiIndex is not supported" diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index d6cb3d79c81e4..ddcd114aa352b 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -30,6 +30,8 @@ def dataframe_from_int_dict(data, frame_template): result = DataFrame(data, index=frame_template.index) if len(result.columns) > 0: result.columns = frame_template.columns[result.columns] + else: + result.columns = frame_template.columns.copy() return result results = {} diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index c5fc054952b1f..ff94502d69ca3 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -84,6 +84,7 @@ from pandas.core.indexes.api import ( Index, MultiIndex, + default_index, ensure_index_from_sequences, ) from pandas.core.series import Series @@ -1093,8 +1094,9 @@ def _get_empty_meta( # # Both must be non-null to ensure a successful construction. Otherwise, # we have to create a generic empty Index. + index: Index if (index_col is None or index_col is False) or index_names is None: - index = Index([]) + index = default_index(0) else: data = [Series([], dtype=dtype_dict[name]) for name in index_names] index = ensure_index_from_sequences(data, names=index_names) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index e7c2618d388c2..c28c3ae58219a 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -114,14 +114,14 @@ def test_apply_with_reduce_empty(): result = empty_frame.apply(x.append, axis=1, result_type="expand") tm.assert_frame_equal(result, empty_frame) result = empty_frame.apply(x.append, axis=1, result_type="reduce") - expected = Series([], index=pd.Index([], dtype=object), dtype=np.float64) + expected = Series([], dtype=np.float64) tm.assert_series_equal(result, expected) empty_with_cols = DataFrame(columns=["a", "b", "c"]) result = empty_with_cols.apply(x.append, axis=1, result_type="expand") tm.assert_frame_equal(result, empty_with_cols) result = empty_with_cols.apply(x.append, axis=1, result_type="reduce") - expected = Series([], index=pd.Index([], dtype=object), dtype=np.float64) + expected = Series([], dtype=np.float64) tm.assert_series_equal(result, expected) # Ensure that x.append hasn't been called @@ -147,7 +147,7 @@ def test_nunique_empty(): tm.assert_series_equal(result, expected) result = df.T.nunique() - expected = Series([], index=pd.Index([]), dtype=np.float64) + expected = Series([], dtype=np.float64) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py index 61c879fb2b20f..add7b5c77ef65 100644 --- a/pandas/tests/apply/test_str.py +++ b/pandas/tests/apply/test_str.py @@ -8,7 +8,6 @@ from pandas import ( DataFrame, - Index, Series, ) import pandas._testing as tm @@ -149,8 +148,8 @@ def test_agg_cython_table_series(series, func, expected): tm.get_cython_table_params( Series(dtype=np.float64), [ - ("cumprod", Series([], Index([]), dtype=np.float64)), - ("cumsum", Series([], Index([]), dtype=np.float64)), + ("cumprod", Series([], dtype=np.float64)), + ("cumsum", Series([], dtype=np.float64)), ], ), tm.get_cython_table_params( diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index b9f8f8512a995..29766ff392296 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -119,7 +119,7 @@ def test_construct_empty_dataframe(self, dtype): # GH 33623 result = pd.DataFrame(columns=["a"], dtype=dtype) expected = pd.DataFrame( - {"a": pd.array([], dtype=dtype)}, index=pd.Index([], dtype="object") + {"a": pd.array([], dtype=dtype)}, index=pd.RangeIndex(0) ) self.assert_frame_equal(result, expected) diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index 3d43dc47b5280..cab81f864d8d8 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -55,7 +55,7 @@ def test_dropna_frame(self, data_missing): # axis = 1 result = df.dropna(axis="columns") - expected = pd.DataFrame(index=[0, 1]) + expected = pd.DataFrame(index=pd.RangeIndex(2), columns=pd.Index([])) self.assert_frame_equal(result, expected) # multiple diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index e4a92ecc5dac1..971ce2e467aa9 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -84,7 +84,7 @@ def test_xs_corner(self): # no columns but Index(dtype=object) df = DataFrame(index=["a", "b", "c"]) result = df.xs("a") - expected = Series([], name="a", index=Index([]), dtype=np.float64) + expected = Series([], name="a", dtype=np.float64) tm.assert_series_equal(result, expected) def test_xs_duplicates(self): diff --git a/pandas/tests/frame/methods/test_count.py b/pandas/tests/frame/methods/test_count.py index 43eb96f7f32d9..1553a8a86305d 100644 --- a/pandas/tests/frame/methods/test_count.py +++ b/pandas/tests/frame/methods/test_count.py @@ -28,7 +28,7 @@ def test_count(self): df = DataFrame() result = df.count() - expected = Series(0, index=[]) + expected = Series(dtype="int64") tm.assert_series_equal(result, expected) def test_count_objects(self, float_string_frame): diff --git a/pandas/tests/frame/methods/test_get_numeric_data.py b/pandas/tests/frame/methods/test_get_numeric_data.py index 8628b76f54b1d..456dfe1075981 100644 --- a/pandas/tests/frame/methods/test_get_numeric_data.py +++ b/pandas/tests/frame/methods/test_get_numeric_data.py @@ -17,7 +17,7 @@ def test_get_numeric_data_preserve_dtype(self): # get the numeric data obj = DataFrame({"A": [1, "2", 3.0]}) result = obj._get_numeric_data() - expected = DataFrame(index=[0, 1, 2], dtype=object) + expected = DataFrame(dtype=object, index=pd.RangeIndex(3), columns=[]) tm.assert_frame_equal(result, expected) def test_get_numeric_data(self): diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 6826b15596850..93e1bcc113765 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -420,7 +420,7 @@ def test_quantile_datetime(self): tm.assert_series_equal(result, expected) result = df[["a", "c"]].quantile([0.5], numeric_only=True) - expected = DataFrame(index=[0.5]) + expected = DataFrame(index=[0.5], columns=[]) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -451,7 +451,7 @@ def test_quantile_dt64_empty(self, dtype, interp_method): interpolation=interpolation, method=method, ) - expected = DataFrame(index=[0.5]) + expected = DataFrame(index=[0.5], columns=[]) tm.assert_frame_equal(res, expected) @pytest.mark.parametrize("invalid", [-1, 2, [0.5, -1], [0.5, 2]]) diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py index 5f648c76d0aa4..271a32017dd97 100644 --- a/pandas/tests/frame/methods/test_rank.py +++ b/pandas/tests/frame/methods/test_rank.py @@ -483,7 +483,7 @@ def test_rank_object_first(self, frame_or_series, na_option, ascending, expected "data,expected", [ ({"a": [1, 2, "a"], "b": [4, 5, 6]}, DataFrame({"b": [1.0, 2.0, 3.0]})), - ({"a": [1, 2, "a"]}, DataFrame(index=range(3))), + ({"a": [1, 2, "a"]}, DataFrame(index=range(3), columns=[])), ], ) def test_rank_mixed_axis_zero(self, data, expected): diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 7487b2c70a264..638387452903b 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -390,7 +390,7 @@ def test_to_csv_dup_cols(self, nrows): def test_to_csv_empty(self): df = DataFrame(index=np.arange(10)) result, expected = self._return_result_expected(df, 1000) - tm.assert_frame_equal(result, expected, check_names=False) + tm.assert_frame_equal(result, expected, check_column_type=False) @pytest.mark.slow def test_to_csv_chunksize(self): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index cacfd6f7a77b1..8051fff7b329d 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -193,13 +193,11 @@ def test_series_with_name_not_matching_column(self): [ lambda: DataFrame(), lambda: DataFrame(None), - lambda: DataFrame({}), lambda: DataFrame(()), lambda: DataFrame([]), lambda: DataFrame(_ for _ in []), lambda: DataFrame(range(0)), lambda: DataFrame(data=None), - lambda: DataFrame(data={}), lambda: DataFrame(data=()), lambda: DataFrame(data=[]), lambda: DataFrame(data=(_ for _ in [])), @@ -213,6 +211,20 @@ def test_empty_constructor(self, constructor): assert len(result.columns) == 0 tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( + "constructor", + [ + lambda: DataFrame({}), + lambda: DataFrame(data={}), + ], + ) + def test_empty_constructor_object_index(self, constructor): + expected = DataFrame(columns=Index([])) + result = constructor() + assert len(result.index) == 0 + assert len(result.columns) == 0 + tm.assert_frame_equal(result, expected, check_index_type=True) + @pytest.mark.parametrize( "emptylike,expected_index,expected_columns", [ @@ -1391,7 +1403,7 @@ def test_constructor_generator(self): def test_constructor_list_of_dicts(self): result = DataFrame([{}]) - expected = DataFrame(index=[0]) + expected = DataFrame(index=RangeIndex(1), columns=[]) tm.assert_frame_equal(result, expected) def test_constructor_ordered_dict_nested_preserve_order(self): @@ -1762,7 +1774,7 @@ def test_constructor_empty_with_string_dtype(self): def test_constructor_empty_with_string_extension(self, nullable_string_dtype): # GH 34915 - expected = DataFrame(index=[], columns=["c1"], dtype=nullable_string_dtype) + expected = DataFrame(columns=["c1"], dtype=nullable_string_dtype) df = DataFrame(columns=["c1"], dtype=nullable_string_dtype) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 6c6a923e363ae..f9f3868375ed5 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1163,7 +1163,7 @@ def test_any_all_bool_only(self): ) result = df.all(bool_only=True) - expected = Series(dtype=np.bool_) + expected = Series(dtype=np.bool_, index=[]) tm.assert_series_equal(result, expected) df = DataFrame( diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index cb796e1b1ec64..f67e2125bbf54 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1251,7 +1251,8 @@ def test_stack_timezone_aware_values(): @pytest.mark.parametrize("dropna", [True, False]) def test_stack_empty_frame(dropna): # GH 36113 - expected = Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64) + levels = [np.array([], dtype=np.int64), np.array([], dtype=np.int64)] + expected = Series(dtype=np.float64, index=MultiIndex(levels=levels, codes=[[], []])) result = DataFrame(dtype=np.float64).stack(dropna=dropna) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 03b917edd357b..659703c4d6d8f 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -674,7 +674,7 @@ def test_no_args_raises(self): # but we do allow this result = gr.agg([]) - expected = DataFrame() + expected = DataFrame(columns=[]) tm.assert_frame_equal(result, expected) def test_series_named_agg_duplicates_no_raises(self): diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index dc09a2e0ea6ad..08c25fb74be83 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -103,7 +103,9 @@ def test_cython_agg_nothing_to_agg(): with pytest.raises(TypeError, match="Could not convert"): frame[["b"]].groupby(frame["a"]).mean() result = frame[["b"]].groupby(frame["a"]).mean(numeric_only=True) - expected = DataFrame([], index=frame["a"].sort_values().drop_duplicates()) + expected = DataFrame( + [], index=frame["a"].sort_values().drop_duplicates(), columns=[] + ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 6b4693b59408d..26cdfa2291021 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -683,7 +683,7 @@ def test_list_grouper_with_nat(self): [ ( "transform", - Series(name=2, dtype=np.float64, index=Index([])), + Series(name=2, dtype=np.float64), ), ( "agg", @@ -875,7 +875,7 @@ def test_groupby_with_single_column(self): df = DataFrame({"a": list("abssbab")}) tm.assert_frame_equal(df.groupby("a").get_group("a"), df.iloc[[0, 5]]) # GH 13530 - exp = DataFrame(index=Index(["a", "b", "s"], name="a")) + exp = DataFrame(index=Index(["a", "b", "s"], name="a"), columns=[]) tm.assert_frame_equal(df.groupby("a").count(), exp) tm.assert_frame_equal(df.groupby("a").sum(), exp) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index f7e6665aad253..db088c7a2afea 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -246,7 +246,7 @@ def check(result, expected): tm.assert_frame_equal(result, expected) dfl = DataFrame(np.random.randn(5, 2), columns=list("AB")) - check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index)) + check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index, columns=[])) check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]]) check(dfl.iloc[4:6], dfl.iloc[[4]]) diff --git a/pandas/tests/indexing/test_na_indexing.py b/pandas/tests/indexing/test_na_indexing.py index 7e54bbc326880..5364cfe852430 100644 --- a/pandas/tests/indexing/test_na_indexing.py +++ b/pandas/tests/indexing/test_na_indexing.py @@ -34,7 +34,7 @@ def test_series_mask_boolean(values, dtype, mask, indexer_class, frame): if frame: if len(values) == 0: # Otherwise obj is an empty DataFrame with shape (0, 1) - obj = pd.DataFrame(dtype=dtype) + obj = pd.DataFrame(dtype=dtype, index=index) else: obj = obj.to_frame() diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 938056902e745..1ce507db618b9 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -100,12 +100,12 @@ def test_partial_set_empty_frame2(self): tm.assert_frame_equal(df, expected) - df = DataFrame() + df = DataFrame(index=Index([])) df["foo"] = Series(df.index) tm.assert_frame_equal(df, expected) - df = DataFrame() + df = DataFrame(index=Index([])) df["foo"] = df.index tm.assert_frame_equal(df, expected) @@ -135,7 +135,7 @@ def test_partial_set_empty_frame4(self): def test_partial_set_empty_frame5(self): df = DataFrame() - tm.assert_index_equal(df.columns, Index([], dtype=object)) + tm.assert_index_equal(df.columns, pd.RangeIndex(0)) df2 = DataFrame() df2[1] = Series([1], index=["foo"]) df.loc[:, 1] = Series([1], index=["foo"]) @@ -182,7 +182,7 @@ def test_partial_set_empty_frame_row(self): df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) y = df[df.A > 5] result = y.reindex(columns=["A", "B", "C"]) - expected = DataFrame(columns=["A", "B", "C"], index=Index([], dtype="int64")) + expected = DataFrame(columns=["A", "B", "C"]) expected["A"] = expected["A"].astype("int64") expected["B"] = expected["B"].astype("float64") expected["C"] = expected["C"].astype("float64") diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 822e24b224052..a204132963c94 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1646,7 +1646,7 @@ def test_read_datetime_multiindex(self, request, engine, read_ext): pd.to_datetime("03/01/2020").to_pydatetime(), ], ) - expected = DataFrame([], columns=expected_column_index) + expected = DataFrame([], index=[], columns=expected_column_index) tm.assert_frame_equal(expected, actual) diff --git a/pandas/tests/io/formats/test_info.py b/pandas/tests/io/formats/test_info.py index 33c78baa1eedc..e33e1476af69a 100644 --- a/pandas/tests/io/formats/test_info.py +++ b/pandas/tests/io/formats/test_info.py @@ -37,7 +37,7 @@ def test_info_empty(): expected = textwrap.dedent( """\ - Index: 0 entries + RangeIndex: 0 entries Empty DataFrame\n""" ) assert result == expected diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 11ee41ed40ce8..d6999b32e6a81 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -152,8 +152,8 @@ def test_to_latex_empty_tabular(self): \begin{tabular}{l} \toprule Empty DataFrame - Columns: Index([], dtype='object') - Index: Index([], dtype='object') \\ + Columns: RangeIndex(start=0, stop=0, step=1) + Index: RangeIndex(start=0, stop=0, step=1) \\ \bottomrule \end{tabular} """ @@ -207,8 +207,8 @@ def test_to_latex_empty_longtable(self): \begin{longtable}{l} \toprule Empty DataFrame - Columns: Index([], dtype='object') - Index: Index([], dtype='object') \\ + Columns: RangeIndex(start=0, stop=0, step=1) + Index: RangeIndex(start=0, stop=0, step=1) \\ \end{longtable} """ ) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 2f3fc4d0fcba8..4edd08014050e 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -207,12 +207,15 @@ def test_roundtrip_empty(self, orient, convert_axes): empty_frame = DataFrame() data = empty_frame.to_json(orient=orient) result = read_json(data, orient=orient, convert_axes=convert_axes) - expected = empty_frame.copy() - - # TODO: both conditions below are probably bugs - if convert_axes: - expected.index = expected.index.astype(float) - expected.columns = expected.columns.astype(float) + if orient == "split": + idx = pd.Index([], dtype=(float if convert_axes else object)) + expected = DataFrame(index=idx, columns=idx) + elif orient in ["index", "columns"]: + # TODO: this condition is probably a bug + idx = pd.Index([], dtype=(float if convert_axes else object)) + expected = DataFrame(columns=idx) + else: + expected = empty_frame.copy() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/dtypes/test_empty.py b/pandas/tests/io/parser/dtypes/test_empty.py index ee02af773129a..1f709a3cd8f28 100644 --- a/pandas/tests/io/parser/dtypes/test_empty.py +++ b/pandas/tests/io/parser/dtypes/test_empty.py @@ -26,7 +26,7 @@ def test_dtype_all_columns_empty(all_parsers): parser = all_parsers result = parser.read_csv(StringIO("A,B"), dtype=str) - expected = DataFrame({"A": [], "B": []}, index=[], dtype=str) + expected = DataFrame({"A": [], "B": []}, dtype=str) tm.assert_frame_equal(result, expected) @@ -38,7 +38,6 @@ def test_empty_pass_dtype(all_parsers): expected = DataFrame( {"one": np.empty(0, dtype="u1"), "two": np.empty(0, dtype=object)}, - index=Index([], dtype=object), ) tm.assert_frame_equal(result, expected) @@ -81,7 +80,6 @@ def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers): expected = DataFrame( {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")}, - index=Index([], dtype=object), ) tm.assert_frame_equal(result, expected) @@ -94,7 +92,6 @@ def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers): expected = DataFrame( {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")}, - index=Index([], dtype=object), ) tm.assert_frame_equal(result, expected) @@ -106,7 +103,6 @@ def test_empty_with_dup_column_pass_dtype_by_indexes(all_parsers): [Series([], name="one", dtype="u1"), Series([], name="one.1", dtype="f")], axis=1, ) - expected.index = expected.index.astype(object) data = "one,one" result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"}) @@ -133,11 +129,11 @@ def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers): (np.float64, DataFrame(columns=["a", "b"], dtype=np.float64)), ( "category", - DataFrame({"a": Categorical([]), "b": Categorical([])}, index=[]), + DataFrame({"a": Categorical([]), "b": Categorical([])}), ), ( {"a": "category", "b": "category"}, - DataFrame({"a": Categorical([]), "b": Categorical([])}, index=[]), + DataFrame({"a": Categorical([]), "b": Categorical([])}), ), ("datetime64[ns]", DataFrame(columns=["a", "b"], dtype="datetime64[ns]")), ( @@ -147,28 +143,24 @@ def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers): "a": Series([], dtype="timedelta64[ns]"), "b": Series([], dtype="timedelta64[ns]"), }, - index=[], ), ), ( {"a": np.int64, "b": np.int32}, DataFrame( {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)}, - index=[], ), ), ( {0: np.int64, 1: np.int32}, DataFrame( {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)}, - index=[], ), ), ( {"a": np.int64, 1: np.int32}, DataFrame( {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)}, - index=[], ), ), ], diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index f30aba3db917e..13c4216710f84 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -251,6 +251,7 @@ def test_index_col_multiindex_columns_no_data(all_parsers): ) expected = DataFrame( [], + index=Index([]), columns=MultiIndex.from_arrays( [["a1", "a2"], ["b1", "b2"]], names=["a0", "b0"] ), diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 1a8149ae41fcb..202e26952f590 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1249,7 +1249,7 @@ def test_parse_dates_empty_string(all_parsers): ( "a\n04.15.2016", {"parse_dates": True, "index_col": 0}, - DataFrame(index=DatetimeIndex(["2016-04-15"], name="a")), + DataFrame(index=DatetimeIndex(["2016-04-15"], name="a"), columns=[]), ), ( "a,b\n04.15.2016,09.16.2013", @@ -1264,7 +1264,8 @@ def test_parse_dates_empty_string(all_parsers): DataFrame( index=MultiIndex.from_tuples( [(datetime(2016, 4, 15), datetime(2013, 9, 16))], names=["a", "b"] - ) + ), + columns=[], ), ), ], diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 3e451239dcd40..61c493a2c368f 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -904,7 +904,7 @@ def test_skiprows_with_iterator(): expected_frames = [ DataFrame({"a": [3, 4]}), DataFrame({"a": [5, 7, 8]}, index=[2, 3, 4]), - DataFrame({"a": []}, index=[], dtype="object"), + DataFrame({"a": []}, dtype="object"), ] for i, result in enumerate(df_iter): tm.assert_frame_equal(result, expected_frames[i]) diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py index bbf159845b1d6..032cb961103df 100644 --- a/pandas/tests/io/parser/usecols/test_usecols_basic.py +++ b/pandas/tests/io/parser/usecols/test_usecols_basic.py @@ -241,7 +241,7 @@ def test_usecols_with_integer_like_header(all_parsers, usecols, expected): def test_empty_usecols(all_parsers): data = "a,b,c\n1,2,3\n4,5,6" - expected = DataFrame() + expected = DataFrame(columns=Index([])) parser = all_parsers result = parser.read_csv(StringIO(data), usecols=set()) @@ -276,7 +276,7 @@ def test_np_array_usecols(all_parsers): } ), ), - (lambda x: False, DataFrame()), + (lambda x: False, DataFrame(columns=Index([]))), ], ) def test_callable_usecols(all_parsers, usecols, expected): diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 75683a1d96bfb..ed72b5e251114 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -899,7 +899,7 @@ def test_partition_cols_pathlib(self, tmp_path, pa, df_compat, path_type): def test_empty_dataframe(self, pa): # GH #27339 - df = pd.DataFrame() + df = pd.DataFrame(index=[], columns=[]) check_round_trip(df, pa) def test_write_with_schema(self, pa): @@ -1174,7 +1174,7 @@ def test_error_on_using_partition_cols_and_partition_on( def test_empty_dataframe(self, fp): # GH #27339 - df = pd.DataFrame() + df = pd.DataFrame(index=[], columns=[]) expected = df.copy() expected.index.name = "index" check_round_trip(df, fp, expected=expected) diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index f07a4e3b58e86..3dafe6fe61b35 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -595,5 +595,5 @@ def test_pickle_frame_v124_unpickle_130(): with open(path, "rb") as fd: df = pickle.load(fd) - expected = pd.DataFrame() + expected = pd.DataFrame(index=[], columns=[]) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 04f147ee40e62..55e8c4e818ce3 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -90,7 +90,7 @@ def test_raises_on_non_datetimelike_index(): xp = DataFrame() msg = ( "Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, " - "but got an instance of 'Index'" + "but got an instance of 'RangeIndex'" ) with pytest.raises(TypeError, match=msg): xp.resample("A").mean() diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py index 68220855b3d7a..0d95d94782ecf 100644 --- a/pandas/tests/reshape/concat/test_empty.py +++ b/pandas/tests/reshape/concat/test_empty.py @@ -4,7 +4,7 @@ import pandas as pd from pandas import ( DataFrame, - Index, + RangeIndex, Series, concat, date_range, @@ -52,7 +52,7 @@ def test_concat_empty_series(self): res = concat([s1, s2], axis=1) exp = DataFrame( {"x": [1, 2, 3], "y": [np.nan, np.nan, np.nan]}, - index=Index([0, 1, 2], dtype="O"), + index=RangeIndex(3), ) tm.assert_frame_equal(res, exp) @@ -70,7 +70,7 @@ def test_concat_empty_series(self): exp = DataFrame( {"x": [1, 2, 3], 0: [np.nan, np.nan, np.nan]}, columns=["x", 0], - index=Index([0, 1, 2], dtype="O"), + index=RangeIndex(3), ) tm.assert_frame_equal(res, exp) @@ -238,7 +238,7 @@ def test_concat_inner_join_empty(self): # GH 15328 df_empty = DataFrame() df_a = DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64") - df_expected = DataFrame({"a": []}, index=[], dtype="int64") + df_expected = DataFrame({"a": []}, index=RangeIndex(0), dtype="int64") for how, expected in [("inner", df_expected), ("outer", df_a)]: result = concat([df_a, df_empty], axis=1, join=how) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 4b32022e177e8..e5927aa094193 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -951,7 +951,7 @@ def test_join_empty(left_empty, how, exp): expected = DataFrame({"B": [np.nan], "A": [1], "C": [5]}) expected = expected.set_index("A") elif exp == "empty": - expected = DataFrame(index=Index([]), columns=["B", "C"], dtype="int64") + expected = DataFrame(columns=["B", "C"], dtype="int64") if how != "cross": expected = expected.rename_axis("A") diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index f9d4d4fdc19e7..fc2069c5d1e42 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -154,7 +154,7 @@ def test_merge_inner_join_empty(self): df_empty = DataFrame() df_a = DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64") result = merge(df_empty, df_a, left_index=True, right_index=True) - expected = DataFrame({"a": []}, index=[], dtype="int64") + expected = DataFrame({"a": []}, dtype="int64") tm.assert_frame_equal(result, expected) def test_merge_common(self, df, df2): @@ -461,11 +461,7 @@ def test_merge_left_empty_right_empty(self, join_type, kwarg): left = DataFrame(columns=["a", "b", "c"]) right = DataFrame(columns=["x", "y", "z"]) - exp_in = DataFrame( - columns=["a", "b", "c", "x", "y", "z"], - index=pd.Index([], dtype=object), - dtype=object, - ) + exp_in = DataFrame(columns=["a", "b", "c", "x", "y", "z"], dtype=object) result = merge(left, right, how=join_type, **kwarg) tm.assert_frame_equal(result, exp_in) @@ -487,8 +483,6 @@ def test_merge_left_empty_right_notempty(self): columns=["a", "b", "c", "x", "y", "z"], ) exp_in = exp_out[0:0] # make empty DataFrame keeping dtype - # result will have object dtype - exp_in.index = exp_in.index.astype(object) def check1(exp, kwarg): result = merge(left, right, how="inner", **kwarg) @@ -1672,7 +1666,10 @@ def test_merge_EA_dtype(self, any_numeric_ea_dtype, how, expected_data): d1 = DataFrame([(1,)], columns=["id"], dtype=any_numeric_ea_dtype) d2 = DataFrame([(2,)], columns=["id"], dtype=any_numeric_ea_dtype) result = merge(d1, d2, how=how) - expected = DataFrame(expected_data, columns=["id"], dtype=any_numeric_ea_dtype) + exp_index = RangeIndex(len(expected_data)) + expected = DataFrame( + expected_data, index=exp_index, columns=["id"], dtype=any_numeric_ea_dtype + ) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -1689,7 +1686,10 @@ def test_merge_string_dtype(self, how, expected_data, any_string_dtype): d1 = DataFrame([("a",)], columns=["id"], dtype=any_string_dtype) d2 = DataFrame([("b",)], columns=["id"], dtype=any_string_dtype) result = merge(d1, d2, how=how) - expected = DataFrame(expected_data, columns=["id"], dtype=any_string_dtype) + exp_idx = RangeIndex(len(expected_data)) + expected = DataFrame( + expected_data, index=exp_idx, columns=["id"], dtype=any_string_dtype + ) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 9c1a07dd3cde4..9a72a8dadf8d0 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1934,7 +1934,7 @@ def test_pivot_margins_name_unicode(self): frame, index=["foo"], aggfunc=len, margins=True, margins_name=greek ) index = Index([1, 2, 3, greek], dtype="object", name="foo") - expected = DataFrame(index=index) + expected = DataFrame(index=index, columns=[]) tm.assert_frame_equal(table, expected) def test_pivot_string_as_func(self): @@ -2107,8 +2107,8 @@ def test_pivot_table_empty_aggfunc(self, margins): result = df.pivot_table( index="A", columns="D", values="id", aggfunc=np.size, margins=margins ) - expected = DataFrame(index=Index([], dtype="int64", name="A")) - expected.columns.name = "D" + exp_cols = Index([], name="D") + expected = DataFrame(index=Index([], dtype="int64", name="A"), columns=exp_cols) tm.assert_frame_equal(result, expected) def test_pivot_table_no_column_raises(self): @@ -2342,7 +2342,7 @@ def test_pivot_duplicates(self): def test_pivot_empty(self): df = DataFrame(columns=["a", "b", "c"]) result = df.pivot(index="a", columns="b", values="c") - expected = DataFrame() + expected = DataFrame(index=[], columns=[]) tm.assert_frame_equal(result, expected, check_names=False) def test_pivot_integer_bug(self): diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index 60ada18410415..698d66ebe7c29 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -97,7 +97,7 @@ def test_reindex_with_datetimes(): def test_reindex_corner(datetime_series): # (don't forget to fix this) I think it's fixed - empty = Series(dtype=object) + empty = Series(index=[]) empty.reindex(datetime_series.index, method="pad") # it works # corner case: pad empty series diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 8b18550dce746..054be774c2308 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -92,15 +92,15 @@ def test_unparseable_strings_with_dt64_dtype(self): # passed. (lambda idx: Series(index=idx), True), (lambda idx: Series(None, index=idx), True), - (lambda idx: Series({}, index=idx), True), - (lambda idx: Series((), index=idx), False), # creates a RangeIndex - (lambda idx: Series([], index=idx), False), # creates a RangeIndex - (lambda idx: Series((_ for _ in []), index=idx), False), # RangeIndex + (lambda idx: Series({}, index=idx), False), # creates an Index[object] + (lambda idx: Series((), index=idx), True), + (lambda idx: Series([], index=idx), True), + (lambda idx: Series((_ for _ in []), index=idx), True), (lambda idx: Series(data=None, index=idx), True), - (lambda idx: Series(data={}, index=idx), True), - (lambda idx: Series(data=(), index=idx), False), # creates a RangeIndex - (lambda idx: Series(data=[], index=idx), False), # creates a RangeIndex - (lambda idx: Series(data=(_ for _ in []), index=idx), False), # RangeIndex + (lambda idx: Series(data={}, index=idx), False), # creates an Index[object] + (lambda idx: Series(data=(), index=idx), True), + (lambda idx: Series(data=[], index=idx), True), + (lambda idx: Series(data=(_ for _ in []), index=idx), True), ], ) @pytest.mark.parametrize("empty_index", [None, []]) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index beda123facb26..4385f71dc653f 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -128,7 +128,7 @@ def test_empty_str_methods(any_string_dtype): DataFrame(columns=[0, 1], dtype=any_string_dtype), empty.str.extract("()()", expand=False), ) - tm.assert_frame_equal(empty_df, empty.str.get_dummies()) + tm.assert_frame_equal(empty_df.set_axis([], axis=1), empty.str.get_dummies()) tm.assert_series_equal(empty_str, empty_str.str.join("")) tm.assert_series_equal(empty_int, empty.str.len()) tm.assert_series_equal(empty_object, empty_str.str.findall("a")) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 8417c6dd8419c..d30e3d7afcf19 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -453,9 +453,7 @@ def test_moment_functions_zero_length_pairwise(f): df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar")) df2["a"] = df2["a"].astype("float64") - df1_expected = DataFrame( - index=MultiIndex.from_product([df1.index, df1.columns]), columns=Index([]) - ) + df1_expected = DataFrame(index=MultiIndex.from_product([df1.index, df1.columns])) df2_expected = DataFrame( index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]), columns=Index(["a"], name="foo"), diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py index 04132ced044fc..315b3003f716b 100644 --- a/pandas/tests/window/test_pairwise.py +++ b/pandas/tests/window/test_pairwise.py @@ -197,9 +197,7 @@ def test_moment_functions_zero_length_pairwise(f): df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar")) df2["a"] = df2["a"].astype("float64") - df1_expected = DataFrame( - index=MultiIndex.from_product([df1.index, df1.columns]), columns=Index([]) - ) + df1_expected = DataFrame(index=MultiIndex.from_product([df1.index, df1.columns])) df2_expected = DataFrame( index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]), columns=Index(["a"], name="foo"),