From 46f7d638ad9681aecd1c541750aabd5a6936dab9 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sun, 27 Nov 2022 03:27:12 +0000 Subject: [PATCH 01/10] BUG/API: ndexes on empty frames/series should be RangeIndex, are Index[object] --- pandas/core/frame.py | 12 +++++++---- pandas/core/internals/construction.py | 2 +- pandas/core/reshape/merge.py | 4 ++-- pandas/core/series.py | 9 +++++++-- pandas/io/parsers/base_parser.py | 4 +++- pandas/tests/apply/test_frame_apply.py | 6 +++--- pandas/tests/apply/test_str.py | 5 ++--- pandas/tests/extension/base/constructors.py | 2 +- pandas/tests/extension/base/missing.py | 2 +- pandas/tests/frame/indexing/test_xs.py | 2 +- pandas/tests/frame/methods/test_count.py | 2 +- .../frame/methods/test_get_numeric_data.py | 2 +- pandas/tests/frame/methods/test_quantile.py | 4 ++-- pandas/tests/frame/methods/test_rank.py | 2 +- pandas/tests/frame/methods/test_to_csv.py | 2 +- pandas/tests/frame/test_constructors.py | 20 +++++++++++++++---- pandas/tests/frame/test_reductions.py | 2 +- pandas/tests/frame/test_stack_unstack.py | 3 ++- .../tests/groupby/aggregate/test_aggregate.py | 2 +- pandas/tests/groupby/aggregate/test_cython.py | 4 +++- pandas/tests/groupby/test_grouping.py | 4 ++-- pandas/tests/indexing/test_iloc.py | 2 +- pandas/tests/indexing/test_na_indexing.py | 2 +- pandas/tests/indexing/test_partial.py | 8 ++++---- pandas/tests/io/excel/test_readers.py | 2 +- pandas/tests/io/formats/test_info.py | 2 +- pandas/tests/io/formats/test_to_latex.py | 8 ++++---- pandas/tests/io/json/test_pandas.py | 15 ++++++++------ pandas/tests/io/parser/dtypes/test_empty.py | 14 +++---------- pandas/tests/io/parser/test_index_col.py | 1 + pandas/tests/io/parser/test_parse_dates.py | 5 +++-- pandas/tests/io/parser/test_read_fwf.py | 2 +- .../io/parser/usecols/test_usecols_basic.py | 4 ++-- pandas/tests/io/test_parquet.py | 4 ++-- pandas/tests/io/test_pickle.py | 2 +- pandas/tests/resample/test_base.py | 2 +- pandas/tests/reshape/concat/test_empty.py | 8 ++++---- pandas/tests/reshape/merge/test_join.py | 2 +- pandas/tests/reshape/merge/test_merge.py | 20 +++++++++---------- pandas/tests/reshape/test_pivot.py | 8 ++++---- pandas/tests/series/methods/test_reindex.py | 2 +- pandas/tests/series/test_constructors.py | 16 +++++++-------- pandas/tests/strings/test_strings.py | 2 +- pandas/tests/window/test_expanding.py | 4 ++-- pandas/tests/window/test_pairwise.py | 4 +--- pandas/tests/window/test_rolling_functions.py | 2 +- 46 files changed, 128 insertions(+), 108 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 98af277cc0bd7..ecb9387f4d705 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -632,8 +632,6 @@ def __init__( copy: bool | None = None, ) -> None: - if data is None: - data = {} if dtype is not None: dtype = self._validate_dtype(dtype) @@ -671,6 +669,12 @@ def __init__( else: copy = False + if data is None: + index = index if index is not None else default_index(0) + columns = columns if columns is not None else default_index(0) + dtype = dtype if dtype is not None else pandas_dtype(object) + data = [] + if isinstance(data, (BlockManager, ArrayManager)): mgr = self._init_mgr( data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy @@ -777,7 +781,7 @@ def __init__( mgr = dict_to_mgr( {}, index, - columns, + columns if columns is not None else default_index(0), dtype=dtype, typ=manager, ) @@ -2310,7 +2314,7 @@ def maybe_reorder( result_index = None if len(arrays) == 0 and index is None and length == 0: # for backward compat use an object Index instead of RangeIndex - result_index = Index([]) + result_index = default_index(0) arrays, arr_columns = reorder_arrays(arrays, arr_columns, columns, length) return arrays, arr_columns, result_index diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 563011abe2c41..07fab0080a747 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -582,7 +582,7 @@ def _extract_index(data) -> Index: """ index: Index if len(data) == 0: - return Index([]) + return default_index(0) raw_lengths = [] indexes: list[list[Hashable] | Index] = [] diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index c422b5b14cacc..16f1a5d0b81e2 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1084,8 +1084,8 @@ def _get_join_info( else: join_index = default_index(len(left_indexer)) - if len(join_index) == 0: - join_index = join_index.astype(object) + if len(join_index) == 0 and not isinstance(join_index, MultiIndex): + join_index = default_index(0).set_names(join_index.name) return join_index, left_indexer, right_indexer def _create_join_index( diff --git a/pandas/core/series.py b/pandas/core/series.py index 1e5f565934b50..3d2783b939743 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -385,11 +385,16 @@ def __init__( if index is not None: index = ensure_index(index) - if data is None: - data = {} if dtype is not None: dtype = self._validate_dtype(dtype) + if data is None: + index = index if index is not None else default_index(0) + if len(index) or dtype is not None: + data = na_value_for_dtype(pandas_dtype(dtype), compat=False) + else: + data = [] + if isinstance(data, MultiIndex): raise NotImplementedError( "initializing a Series from a MultiIndex is not supported" diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index c5fc054952b1f..ff94502d69ca3 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -84,6 +84,7 @@ from pandas.core.indexes.api import ( Index, MultiIndex, + default_index, ensure_index_from_sequences, ) from pandas.core.series import Series @@ -1093,8 +1094,9 @@ def _get_empty_meta( # # Both must be non-null to ensure a successful construction. Otherwise, # we have to create a generic empty Index. + index: Index if (index_col is None or index_col is False) or index_names is None: - index = Index([]) + index = default_index(0) else: data = [Series([], dtype=dtype_dict[name]) for name in index_names] index = ensure_index_from_sequences(data, names=index_names) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index e7c2618d388c2..c28c3ae58219a 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -114,14 +114,14 @@ def test_apply_with_reduce_empty(): result = empty_frame.apply(x.append, axis=1, result_type="expand") tm.assert_frame_equal(result, empty_frame) result = empty_frame.apply(x.append, axis=1, result_type="reduce") - expected = Series([], index=pd.Index([], dtype=object), dtype=np.float64) + expected = Series([], dtype=np.float64) tm.assert_series_equal(result, expected) empty_with_cols = DataFrame(columns=["a", "b", "c"]) result = empty_with_cols.apply(x.append, axis=1, result_type="expand") tm.assert_frame_equal(result, empty_with_cols) result = empty_with_cols.apply(x.append, axis=1, result_type="reduce") - expected = Series([], index=pd.Index([], dtype=object), dtype=np.float64) + expected = Series([], dtype=np.float64) tm.assert_series_equal(result, expected) # Ensure that x.append hasn't been called @@ -147,7 +147,7 @@ def test_nunique_empty(): tm.assert_series_equal(result, expected) result = df.T.nunique() - expected = Series([], index=pd.Index([]), dtype=np.float64) + expected = Series([], dtype=np.float64) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py index 61c879fb2b20f..add7b5c77ef65 100644 --- a/pandas/tests/apply/test_str.py +++ b/pandas/tests/apply/test_str.py @@ -8,7 +8,6 @@ from pandas import ( DataFrame, - Index, Series, ) import pandas._testing as tm @@ -149,8 +148,8 @@ def test_agg_cython_table_series(series, func, expected): tm.get_cython_table_params( Series(dtype=np.float64), [ - ("cumprod", Series([], Index([]), dtype=np.float64)), - ("cumsum", Series([], Index([]), dtype=np.float64)), + ("cumprod", Series([], dtype=np.float64)), + ("cumsum", Series([], dtype=np.float64)), ], ), tm.get_cython_table_params( diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index b9f8f8512a995..29766ff392296 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -119,7 +119,7 @@ def test_construct_empty_dataframe(self, dtype): # GH 33623 result = pd.DataFrame(columns=["a"], dtype=dtype) expected = pd.DataFrame( - {"a": pd.array([], dtype=dtype)}, index=pd.Index([], dtype="object") + {"a": pd.array([], dtype=dtype)}, index=pd.RangeIndex(0) ) self.assert_frame_equal(result, expected) diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index 3d43dc47b5280..cab81f864d8d8 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -55,7 +55,7 @@ def test_dropna_frame(self, data_missing): # axis = 1 result = df.dropna(axis="columns") - expected = pd.DataFrame(index=[0, 1]) + expected = pd.DataFrame(index=pd.RangeIndex(2), columns=pd.Index([])) self.assert_frame_equal(result, expected) # multiple diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index e4a92ecc5dac1..971ce2e467aa9 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -84,7 +84,7 @@ def test_xs_corner(self): # no columns but Index(dtype=object) df = DataFrame(index=["a", "b", "c"]) result = df.xs("a") - expected = Series([], name="a", index=Index([]), dtype=np.float64) + expected = Series([], name="a", dtype=np.float64) tm.assert_series_equal(result, expected) def test_xs_duplicates(self): diff --git a/pandas/tests/frame/methods/test_count.py b/pandas/tests/frame/methods/test_count.py index 43eb96f7f32d9..1553a8a86305d 100644 --- a/pandas/tests/frame/methods/test_count.py +++ b/pandas/tests/frame/methods/test_count.py @@ -28,7 +28,7 @@ def test_count(self): df = DataFrame() result = df.count() - expected = Series(0, index=[]) + expected = Series(dtype="int64") tm.assert_series_equal(result, expected) def test_count_objects(self, float_string_frame): diff --git a/pandas/tests/frame/methods/test_get_numeric_data.py b/pandas/tests/frame/methods/test_get_numeric_data.py index 8628b76f54b1d..456dfe1075981 100644 --- a/pandas/tests/frame/methods/test_get_numeric_data.py +++ b/pandas/tests/frame/methods/test_get_numeric_data.py @@ -17,7 +17,7 @@ def test_get_numeric_data_preserve_dtype(self): # get the numeric data obj = DataFrame({"A": [1, "2", 3.0]}) result = obj._get_numeric_data() - expected = DataFrame(index=[0, 1, 2], dtype=object) + expected = DataFrame(dtype=object, index=pd.RangeIndex(3), columns=[]) tm.assert_frame_equal(result, expected) def test_get_numeric_data(self): diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 6826b15596850..93e1bcc113765 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -420,7 +420,7 @@ def test_quantile_datetime(self): tm.assert_series_equal(result, expected) result = df[["a", "c"]].quantile([0.5], numeric_only=True) - expected = DataFrame(index=[0.5]) + expected = DataFrame(index=[0.5], columns=[]) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -451,7 +451,7 @@ def test_quantile_dt64_empty(self, dtype, interp_method): interpolation=interpolation, method=method, ) - expected = DataFrame(index=[0.5]) + expected = DataFrame(index=[0.5], columns=[]) tm.assert_frame_equal(res, expected) @pytest.mark.parametrize("invalid", [-1, 2, [0.5, -1], [0.5, 2]]) diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py index 5f648c76d0aa4..271a32017dd97 100644 --- a/pandas/tests/frame/methods/test_rank.py +++ b/pandas/tests/frame/methods/test_rank.py @@ -483,7 +483,7 @@ def test_rank_object_first(self, frame_or_series, na_option, ascending, expected "data,expected", [ ({"a": [1, 2, "a"], "b": [4, 5, 6]}, DataFrame({"b": [1.0, 2.0, 3.0]})), - ({"a": [1, 2, "a"]}, DataFrame(index=range(3))), + ({"a": [1, 2, "a"]}, DataFrame(index=range(3), columns=[])), ], ) def test_rank_mixed_axis_zero(self, data, expected): diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 7487b2c70a264..638387452903b 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -390,7 +390,7 @@ def test_to_csv_dup_cols(self, nrows): def test_to_csv_empty(self): df = DataFrame(index=np.arange(10)) result, expected = self._return_result_expected(df, 1000) - tm.assert_frame_equal(result, expected, check_names=False) + tm.assert_frame_equal(result, expected, check_column_type=False) @pytest.mark.slow def test_to_csv_chunksize(self): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index cacfd6f7a77b1..8051fff7b329d 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -193,13 +193,11 @@ def test_series_with_name_not_matching_column(self): [ lambda: DataFrame(), lambda: DataFrame(None), - lambda: DataFrame({}), lambda: DataFrame(()), lambda: DataFrame([]), lambda: DataFrame(_ for _ in []), lambda: DataFrame(range(0)), lambda: DataFrame(data=None), - lambda: DataFrame(data={}), lambda: DataFrame(data=()), lambda: DataFrame(data=[]), lambda: DataFrame(data=(_ for _ in [])), @@ -213,6 +211,20 @@ def test_empty_constructor(self, constructor): assert len(result.columns) == 0 tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( + "constructor", + [ + lambda: DataFrame({}), + lambda: DataFrame(data={}), + ], + ) + def test_empty_constructor_object_index(self, constructor): + expected = DataFrame(columns=Index([])) + result = constructor() + assert len(result.index) == 0 + assert len(result.columns) == 0 + tm.assert_frame_equal(result, expected, check_index_type=True) + @pytest.mark.parametrize( "emptylike,expected_index,expected_columns", [ @@ -1391,7 +1403,7 @@ def test_constructor_generator(self): def test_constructor_list_of_dicts(self): result = DataFrame([{}]) - expected = DataFrame(index=[0]) + expected = DataFrame(index=RangeIndex(1), columns=[]) tm.assert_frame_equal(result, expected) def test_constructor_ordered_dict_nested_preserve_order(self): @@ -1762,7 +1774,7 @@ def test_constructor_empty_with_string_dtype(self): def test_constructor_empty_with_string_extension(self, nullable_string_dtype): # GH 34915 - expected = DataFrame(index=[], columns=["c1"], dtype=nullable_string_dtype) + expected = DataFrame(columns=["c1"], dtype=nullable_string_dtype) df = DataFrame(columns=["c1"], dtype=nullable_string_dtype) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 6c6a923e363ae..f9f3868375ed5 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1163,7 +1163,7 @@ def test_any_all_bool_only(self): ) result = df.all(bool_only=True) - expected = Series(dtype=np.bool_) + expected = Series(dtype=np.bool_, index=[]) tm.assert_series_equal(result, expected) df = DataFrame( diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index cb796e1b1ec64..f67e2125bbf54 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1251,7 +1251,8 @@ def test_stack_timezone_aware_values(): @pytest.mark.parametrize("dropna", [True, False]) def test_stack_empty_frame(dropna): # GH 36113 - expected = Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64) + levels = [np.array([], dtype=np.int64), np.array([], dtype=np.int64)] + expected = Series(dtype=np.float64, index=MultiIndex(levels=levels, codes=[[], []])) result = DataFrame(dtype=np.float64).stack(dropna=dropna) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 03b917edd357b..659703c4d6d8f 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -674,7 +674,7 @@ def test_no_args_raises(self): # but we do allow this result = gr.agg([]) - expected = DataFrame() + expected = DataFrame(columns=[]) tm.assert_frame_equal(result, expected) def test_series_named_agg_duplicates_no_raises(self): diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index dc09a2e0ea6ad..08c25fb74be83 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -103,7 +103,9 @@ def test_cython_agg_nothing_to_agg(): with pytest.raises(TypeError, match="Could not convert"): frame[["b"]].groupby(frame["a"]).mean() result = frame[["b"]].groupby(frame["a"]).mean(numeric_only=True) - expected = DataFrame([], index=frame["a"].sort_values().drop_duplicates()) + expected = DataFrame( + [], index=frame["a"].sort_values().drop_duplicates(), columns=[] + ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 6b4693b59408d..26cdfa2291021 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -683,7 +683,7 @@ def test_list_grouper_with_nat(self): [ ( "transform", - Series(name=2, dtype=np.float64, index=Index([])), + Series(name=2, dtype=np.float64), ), ( "agg", @@ -875,7 +875,7 @@ def test_groupby_with_single_column(self): df = DataFrame({"a": list("abssbab")}) tm.assert_frame_equal(df.groupby("a").get_group("a"), df.iloc[[0, 5]]) # GH 13530 - exp = DataFrame(index=Index(["a", "b", "s"], name="a")) + exp = DataFrame(index=Index(["a", "b", "s"], name="a"), columns=[]) tm.assert_frame_equal(df.groupby("a").count(), exp) tm.assert_frame_equal(df.groupby("a").sum(), exp) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index f7e6665aad253..db088c7a2afea 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -246,7 +246,7 @@ def check(result, expected): tm.assert_frame_equal(result, expected) dfl = DataFrame(np.random.randn(5, 2), columns=list("AB")) - check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index)) + check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index, columns=[])) check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]]) check(dfl.iloc[4:6], dfl.iloc[[4]]) diff --git a/pandas/tests/indexing/test_na_indexing.py b/pandas/tests/indexing/test_na_indexing.py index 7e54bbc326880..5364cfe852430 100644 --- a/pandas/tests/indexing/test_na_indexing.py +++ b/pandas/tests/indexing/test_na_indexing.py @@ -34,7 +34,7 @@ def test_series_mask_boolean(values, dtype, mask, indexer_class, frame): if frame: if len(values) == 0: # Otherwise obj is an empty DataFrame with shape (0, 1) - obj = pd.DataFrame(dtype=dtype) + obj = pd.DataFrame(dtype=dtype, index=index) else: obj = obj.to_frame() diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 938056902e745..1ce507db618b9 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -100,12 +100,12 @@ def test_partial_set_empty_frame2(self): tm.assert_frame_equal(df, expected) - df = DataFrame() + df = DataFrame(index=Index([])) df["foo"] = Series(df.index) tm.assert_frame_equal(df, expected) - df = DataFrame() + df = DataFrame(index=Index([])) df["foo"] = df.index tm.assert_frame_equal(df, expected) @@ -135,7 +135,7 @@ def test_partial_set_empty_frame4(self): def test_partial_set_empty_frame5(self): df = DataFrame() - tm.assert_index_equal(df.columns, Index([], dtype=object)) + tm.assert_index_equal(df.columns, pd.RangeIndex(0)) df2 = DataFrame() df2[1] = Series([1], index=["foo"]) df.loc[:, 1] = Series([1], index=["foo"]) @@ -182,7 +182,7 @@ def test_partial_set_empty_frame_row(self): df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) y = df[df.A > 5] result = y.reindex(columns=["A", "B", "C"]) - expected = DataFrame(columns=["A", "B", "C"], index=Index([], dtype="int64")) + expected = DataFrame(columns=["A", "B", "C"]) expected["A"] = expected["A"].astype("int64") expected["B"] = expected["B"].astype("float64") expected["C"] = expected["C"].astype("float64") diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 822e24b224052..a204132963c94 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1646,7 +1646,7 @@ def test_read_datetime_multiindex(self, request, engine, read_ext): pd.to_datetime("03/01/2020").to_pydatetime(), ], ) - expected = DataFrame([], columns=expected_column_index) + expected = DataFrame([], index=[], columns=expected_column_index) tm.assert_frame_equal(expected, actual) diff --git a/pandas/tests/io/formats/test_info.py b/pandas/tests/io/formats/test_info.py index 33c78baa1eedc..e33e1476af69a 100644 --- a/pandas/tests/io/formats/test_info.py +++ b/pandas/tests/io/formats/test_info.py @@ -37,7 +37,7 @@ def test_info_empty(): expected = textwrap.dedent( """\ - Index: 0 entries + RangeIndex: 0 entries Empty DataFrame\n""" ) assert result == expected diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 11ee41ed40ce8..d6999b32e6a81 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -152,8 +152,8 @@ def test_to_latex_empty_tabular(self): \begin{tabular}{l} \toprule Empty DataFrame - Columns: Index([], dtype='object') - Index: Index([], dtype='object') \\ + Columns: RangeIndex(start=0, stop=0, step=1) + Index: RangeIndex(start=0, stop=0, step=1) \\ \bottomrule \end{tabular} """ @@ -207,8 +207,8 @@ def test_to_latex_empty_longtable(self): \begin{longtable}{l} \toprule Empty DataFrame - Columns: Index([], dtype='object') - Index: Index([], dtype='object') \\ + Columns: RangeIndex(start=0, stop=0, step=1) + Index: RangeIndex(start=0, stop=0, step=1) \\ \end{longtable} """ ) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 2f3fc4d0fcba8..4edd08014050e 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -207,12 +207,15 @@ def test_roundtrip_empty(self, orient, convert_axes): empty_frame = DataFrame() data = empty_frame.to_json(orient=orient) result = read_json(data, orient=orient, convert_axes=convert_axes) - expected = empty_frame.copy() - - # TODO: both conditions below are probably bugs - if convert_axes: - expected.index = expected.index.astype(float) - expected.columns = expected.columns.astype(float) + if orient == "split": + idx = pd.Index([], dtype=(float if convert_axes else object)) + expected = DataFrame(index=idx, columns=idx) + elif orient in ["index", "columns"]: + # TODO: this condition is probably a bug + idx = pd.Index([], dtype=(float if convert_axes else object)) + expected = DataFrame(columns=idx) + else: + expected = empty_frame.copy() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/dtypes/test_empty.py b/pandas/tests/io/parser/dtypes/test_empty.py index ee02af773129a..1f709a3cd8f28 100644 --- a/pandas/tests/io/parser/dtypes/test_empty.py +++ b/pandas/tests/io/parser/dtypes/test_empty.py @@ -26,7 +26,7 @@ def test_dtype_all_columns_empty(all_parsers): parser = all_parsers result = parser.read_csv(StringIO("A,B"), dtype=str) - expected = DataFrame({"A": [], "B": []}, index=[], dtype=str) + expected = DataFrame({"A": [], "B": []}, dtype=str) tm.assert_frame_equal(result, expected) @@ -38,7 +38,6 @@ def test_empty_pass_dtype(all_parsers): expected = DataFrame( {"one": np.empty(0, dtype="u1"), "two": np.empty(0, dtype=object)}, - index=Index([], dtype=object), ) tm.assert_frame_equal(result, expected) @@ -81,7 +80,6 @@ def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers): expected = DataFrame( {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")}, - index=Index([], dtype=object), ) tm.assert_frame_equal(result, expected) @@ -94,7 +92,6 @@ def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers): expected = DataFrame( {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")}, - index=Index([], dtype=object), ) tm.assert_frame_equal(result, expected) @@ -106,7 +103,6 @@ def test_empty_with_dup_column_pass_dtype_by_indexes(all_parsers): [Series([], name="one", dtype="u1"), Series([], name="one.1", dtype="f")], axis=1, ) - expected.index = expected.index.astype(object) data = "one,one" result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"}) @@ -133,11 +129,11 @@ def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers): (np.float64, DataFrame(columns=["a", "b"], dtype=np.float64)), ( "category", - DataFrame({"a": Categorical([]), "b": Categorical([])}, index=[]), + DataFrame({"a": Categorical([]), "b": Categorical([])}), ), ( {"a": "category", "b": "category"}, - DataFrame({"a": Categorical([]), "b": Categorical([])}, index=[]), + DataFrame({"a": Categorical([]), "b": Categorical([])}), ), ("datetime64[ns]", DataFrame(columns=["a", "b"], dtype="datetime64[ns]")), ( @@ -147,28 +143,24 @@ def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers): "a": Series([], dtype="timedelta64[ns]"), "b": Series([], dtype="timedelta64[ns]"), }, - index=[], ), ), ( {"a": np.int64, "b": np.int32}, DataFrame( {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)}, - index=[], ), ), ( {0: np.int64, 1: np.int32}, DataFrame( {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)}, - index=[], ), ), ( {"a": np.int64, 1: np.int32}, DataFrame( {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)}, - index=[], ), ), ], diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index f30aba3db917e..13c4216710f84 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -251,6 +251,7 @@ def test_index_col_multiindex_columns_no_data(all_parsers): ) expected = DataFrame( [], + index=Index([]), columns=MultiIndex.from_arrays( [["a1", "a2"], ["b1", "b2"]], names=["a0", "b0"] ), diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 1a8149ae41fcb..202e26952f590 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1249,7 +1249,7 @@ def test_parse_dates_empty_string(all_parsers): ( "a\n04.15.2016", {"parse_dates": True, "index_col": 0}, - DataFrame(index=DatetimeIndex(["2016-04-15"], name="a")), + DataFrame(index=DatetimeIndex(["2016-04-15"], name="a"), columns=[]), ), ( "a,b\n04.15.2016,09.16.2013", @@ -1264,7 +1264,8 @@ def test_parse_dates_empty_string(all_parsers): DataFrame( index=MultiIndex.from_tuples( [(datetime(2016, 4, 15), datetime(2013, 9, 16))], names=["a", "b"] - ) + ), + columns=[], ), ), ], diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 3e451239dcd40..61c493a2c368f 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -904,7 +904,7 @@ def test_skiprows_with_iterator(): expected_frames = [ DataFrame({"a": [3, 4]}), DataFrame({"a": [5, 7, 8]}, index=[2, 3, 4]), - DataFrame({"a": []}, index=[], dtype="object"), + DataFrame({"a": []}, dtype="object"), ] for i, result in enumerate(df_iter): tm.assert_frame_equal(result, expected_frames[i]) diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py index bbf159845b1d6..032cb961103df 100644 --- a/pandas/tests/io/parser/usecols/test_usecols_basic.py +++ b/pandas/tests/io/parser/usecols/test_usecols_basic.py @@ -241,7 +241,7 @@ def test_usecols_with_integer_like_header(all_parsers, usecols, expected): def test_empty_usecols(all_parsers): data = "a,b,c\n1,2,3\n4,5,6" - expected = DataFrame() + expected = DataFrame(columns=Index([])) parser = all_parsers result = parser.read_csv(StringIO(data), usecols=set()) @@ -276,7 +276,7 @@ def test_np_array_usecols(all_parsers): } ), ), - (lambda x: False, DataFrame()), + (lambda x: False, DataFrame(columns=Index([]))), ], ) def test_callable_usecols(all_parsers, usecols, expected): diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 75683a1d96bfb..ed72b5e251114 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -899,7 +899,7 @@ def test_partition_cols_pathlib(self, tmp_path, pa, df_compat, path_type): def test_empty_dataframe(self, pa): # GH #27339 - df = pd.DataFrame() + df = pd.DataFrame(index=[], columns=[]) check_round_trip(df, pa) def test_write_with_schema(self, pa): @@ -1174,7 +1174,7 @@ def test_error_on_using_partition_cols_and_partition_on( def test_empty_dataframe(self, fp): # GH #27339 - df = pd.DataFrame() + df = pd.DataFrame(index=[], columns=[]) expected = df.copy() expected.index.name = "index" check_round_trip(df, fp, expected=expected) diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index f07a4e3b58e86..3dafe6fe61b35 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -595,5 +595,5 @@ def test_pickle_frame_v124_unpickle_130(): with open(path, "rb") as fd: df = pickle.load(fd) - expected = pd.DataFrame() + expected = pd.DataFrame(index=[], columns=[]) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 04f147ee40e62..55e8c4e818ce3 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -90,7 +90,7 @@ def test_raises_on_non_datetimelike_index(): xp = DataFrame() msg = ( "Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, " - "but got an instance of 'Index'" + "but got an instance of 'RangeIndex'" ) with pytest.raises(TypeError, match=msg): xp.resample("A").mean() diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py index 68220855b3d7a..0d95d94782ecf 100644 --- a/pandas/tests/reshape/concat/test_empty.py +++ b/pandas/tests/reshape/concat/test_empty.py @@ -4,7 +4,7 @@ import pandas as pd from pandas import ( DataFrame, - Index, + RangeIndex, Series, concat, date_range, @@ -52,7 +52,7 @@ def test_concat_empty_series(self): res = concat([s1, s2], axis=1) exp = DataFrame( {"x": [1, 2, 3], "y": [np.nan, np.nan, np.nan]}, - index=Index([0, 1, 2], dtype="O"), + index=RangeIndex(3), ) tm.assert_frame_equal(res, exp) @@ -70,7 +70,7 @@ def test_concat_empty_series(self): exp = DataFrame( {"x": [1, 2, 3], 0: [np.nan, np.nan, np.nan]}, columns=["x", 0], - index=Index([0, 1, 2], dtype="O"), + index=RangeIndex(3), ) tm.assert_frame_equal(res, exp) @@ -238,7 +238,7 @@ def test_concat_inner_join_empty(self): # GH 15328 df_empty = DataFrame() df_a = DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64") - df_expected = DataFrame({"a": []}, index=[], dtype="int64") + df_expected = DataFrame({"a": []}, index=RangeIndex(0), dtype="int64") for how, expected in [("inner", df_expected), ("outer", df_a)]: result = concat([df_a, df_empty], axis=1, join=how) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 4b32022e177e8..e5927aa094193 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -951,7 +951,7 @@ def test_join_empty(left_empty, how, exp): expected = DataFrame({"B": [np.nan], "A": [1], "C": [5]}) expected = expected.set_index("A") elif exp == "empty": - expected = DataFrame(index=Index([]), columns=["B", "C"], dtype="int64") + expected = DataFrame(columns=["B", "C"], dtype="int64") if how != "cross": expected = expected.rename_axis("A") diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index f9d4d4fdc19e7..fc2069c5d1e42 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -154,7 +154,7 @@ def test_merge_inner_join_empty(self): df_empty = DataFrame() df_a = DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64") result = merge(df_empty, df_a, left_index=True, right_index=True) - expected = DataFrame({"a": []}, index=[], dtype="int64") + expected = DataFrame({"a": []}, dtype="int64") tm.assert_frame_equal(result, expected) def test_merge_common(self, df, df2): @@ -461,11 +461,7 @@ def test_merge_left_empty_right_empty(self, join_type, kwarg): left = DataFrame(columns=["a", "b", "c"]) right = DataFrame(columns=["x", "y", "z"]) - exp_in = DataFrame( - columns=["a", "b", "c", "x", "y", "z"], - index=pd.Index([], dtype=object), - dtype=object, - ) + exp_in = DataFrame(columns=["a", "b", "c", "x", "y", "z"], dtype=object) result = merge(left, right, how=join_type, **kwarg) tm.assert_frame_equal(result, exp_in) @@ -487,8 +483,6 @@ def test_merge_left_empty_right_notempty(self): columns=["a", "b", "c", "x", "y", "z"], ) exp_in = exp_out[0:0] # make empty DataFrame keeping dtype - # result will have object dtype - exp_in.index = exp_in.index.astype(object) def check1(exp, kwarg): result = merge(left, right, how="inner", **kwarg) @@ -1672,7 +1666,10 @@ def test_merge_EA_dtype(self, any_numeric_ea_dtype, how, expected_data): d1 = DataFrame([(1,)], columns=["id"], dtype=any_numeric_ea_dtype) d2 = DataFrame([(2,)], columns=["id"], dtype=any_numeric_ea_dtype) result = merge(d1, d2, how=how) - expected = DataFrame(expected_data, columns=["id"], dtype=any_numeric_ea_dtype) + exp_index = RangeIndex(len(expected_data)) + expected = DataFrame( + expected_data, index=exp_index, columns=["id"], dtype=any_numeric_ea_dtype + ) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -1689,7 +1686,10 @@ def test_merge_string_dtype(self, how, expected_data, any_string_dtype): d1 = DataFrame([("a",)], columns=["id"], dtype=any_string_dtype) d2 = DataFrame([("b",)], columns=["id"], dtype=any_string_dtype) result = merge(d1, d2, how=how) - expected = DataFrame(expected_data, columns=["id"], dtype=any_string_dtype) + exp_idx = RangeIndex(len(expected_data)) + expected = DataFrame( + expected_data, index=exp_idx, columns=["id"], dtype=any_string_dtype + ) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 9c1a07dd3cde4..9a72a8dadf8d0 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1934,7 +1934,7 @@ def test_pivot_margins_name_unicode(self): frame, index=["foo"], aggfunc=len, margins=True, margins_name=greek ) index = Index([1, 2, 3, greek], dtype="object", name="foo") - expected = DataFrame(index=index) + expected = DataFrame(index=index, columns=[]) tm.assert_frame_equal(table, expected) def test_pivot_string_as_func(self): @@ -2107,8 +2107,8 @@ def test_pivot_table_empty_aggfunc(self, margins): result = df.pivot_table( index="A", columns="D", values="id", aggfunc=np.size, margins=margins ) - expected = DataFrame(index=Index([], dtype="int64", name="A")) - expected.columns.name = "D" + exp_cols = Index([], name="D") + expected = DataFrame(index=Index([], dtype="int64", name="A"), columns=exp_cols) tm.assert_frame_equal(result, expected) def test_pivot_table_no_column_raises(self): @@ -2342,7 +2342,7 @@ def test_pivot_duplicates(self): def test_pivot_empty(self): df = DataFrame(columns=["a", "b", "c"]) result = df.pivot(index="a", columns="b", values="c") - expected = DataFrame() + expected = DataFrame(index=[], columns=[]) tm.assert_frame_equal(result, expected, check_names=False) def test_pivot_integer_bug(self): diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index 60ada18410415..698d66ebe7c29 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -97,7 +97,7 @@ def test_reindex_with_datetimes(): def test_reindex_corner(datetime_series): # (don't forget to fix this) I think it's fixed - empty = Series(dtype=object) + empty = Series(index=[]) empty.reindex(datetime_series.index, method="pad") # it works # corner case: pad empty series diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 8b18550dce746..054be774c2308 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -92,15 +92,15 @@ def test_unparseable_strings_with_dt64_dtype(self): # passed. (lambda idx: Series(index=idx), True), (lambda idx: Series(None, index=idx), True), - (lambda idx: Series({}, index=idx), True), - (lambda idx: Series((), index=idx), False), # creates a RangeIndex - (lambda idx: Series([], index=idx), False), # creates a RangeIndex - (lambda idx: Series((_ for _ in []), index=idx), False), # RangeIndex + (lambda idx: Series({}, index=idx), False), # creates an Index[object] + (lambda idx: Series((), index=idx), True), + (lambda idx: Series([], index=idx), True), + (lambda idx: Series((_ for _ in []), index=idx), True), (lambda idx: Series(data=None, index=idx), True), - (lambda idx: Series(data={}, index=idx), True), - (lambda idx: Series(data=(), index=idx), False), # creates a RangeIndex - (lambda idx: Series(data=[], index=idx), False), # creates a RangeIndex - (lambda idx: Series(data=(_ for _ in []), index=idx), False), # RangeIndex + (lambda idx: Series(data={}, index=idx), False), # creates an Index[object] + (lambda idx: Series(data=(), index=idx), True), + (lambda idx: Series(data=[], index=idx), True), + (lambda idx: Series(data=(_ for _ in []), index=idx), True), ], ) @pytest.mark.parametrize("empty_index", [None, []]) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index beda123facb26..4385f71dc653f 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -128,7 +128,7 @@ def test_empty_str_methods(any_string_dtype): DataFrame(columns=[0, 1], dtype=any_string_dtype), empty.str.extract("()()", expand=False), ) - tm.assert_frame_equal(empty_df, empty.str.get_dummies()) + tm.assert_frame_equal(empty_df.set_axis([], axis=1), empty.str.get_dummies()) tm.assert_series_equal(empty_str, empty_str.str.join("")) tm.assert_series_equal(empty_int, empty.str.len()) tm.assert_series_equal(empty_object, empty_str.str.findall("a")) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 8417c6dd8419c..56dbe823a17c4 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -454,7 +454,7 @@ def test_moment_functions_zero_length_pairwise(f): df2["a"] = df2["a"].astype("float64") df1_expected = DataFrame( - index=MultiIndex.from_product([df1.index, df1.columns]), columns=Index([]) + index=MultiIndex.from_product([df1.index, df1.columns]) ) df2_expected = DataFrame( index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]), @@ -494,7 +494,7 @@ def test_moment_functions_zero_length(f): s = Series(dtype=np.float64) s_expected = s df1 = DataFrame() - df1_expected = df1 + df1_expected = df1.set_axis([], axis=1) df2 = DataFrame(columns=["a"]) df2["a"] = df2["a"].astype("float64") df2_expected = df2 diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py index 04132ced044fc..315b3003f716b 100644 --- a/pandas/tests/window/test_pairwise.py +++ b/pandas/tests/window/test_pairwise.py @@ -197,9 +197,7 @@ def test_moment_functions_zero_length_pairwise(f): df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar")) df2["a"] = df2["a"].astype("float64") - df1_expected = DataFrame( - index=MultiIndex.from_product([df1.index, df1.columns]), columns=Index([]) - ) + df1_expected = DataFrame(index=MultiIndex.from_product([df1.index, df1.columns])) df2_expected = DataFrame( index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]), columns=Index(["a"], name="foo"), diff --git a/pandas/tests/window/test_rolling_functions.py b/pandas/tests/window/test_rolling_functions.py index bb6faf4f4eb22..72c3db478516c 100644 --- a/pandas/tests/window/test_rolling_functions.py +++ b/pandas/tests/window/test_rolling_functions.py @@ -516,7 +516,7 @@ def test_moment_functions_zero_length(f): s = Series(dtype=np.float64) s_expected = s df1 = DataFrame() - df1_expected = df1 + df1_expected = df1.set_axis([], axis=1) df2 = DataFrame(columns=["a"]) df2["a"] = df2["a"].astype("float64") df2_expected = df2 From 8fea9fac131d71529f97ec51221b0a39cb2b3627 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sun, 27 Nov 2022 03:39:10 +0000 Subject: [PATCH 02/10] fix black --- pandas/tests/window/test_expanding.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 56dbe823a17c4..877f45b1ae1c0 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -453,9 +453,7 @@ def test_moment_functions_zero_length_pairwise(f): df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar")) df2["a"] = df2["a"].astype("float64") - df1_expected = DataFrame( - index=MultiIndex.from_product([df1.index, df1.columns]) - ) + df1_expected = DataFrame(index=MultiIndex.from_product([df1.index, df1.columns])) df2_expected = DataFrame( index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]), columns=Index(["a"], name="foo"), From 2e9ed538d1ff75684e5608dfe9f3016e67d4c9a9 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sun, 27 Nov 2022 09:56:49 +0000 Subject: [PATCH 03/10] fix window stuff --- pandas/core/window/common.py | 2 ++ pandas/tests/window/test_expanding.py | 2 +- pandas/tests/window/test_rolling_functions.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index d6cb3d79c81e4..ddcd114aa352b 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -30,6 +30,8 @@ def dataframe_from_int_dict(data, frame_template): result = DataFrame(data, index=frame_template.index) if len(result.columns) > 0: result.columns = frame_template.columns[result.columns] + else: + result.columns = frame_template.columns.copy() return result results = {} diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 877f45b1ae1c0..d30e3d7afcf19 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -492,7 +492,7 @@ def test_moment_functions_zero_length(f): s = Series(dtype=np.float64) s_expected = s df1 = DataFrame() - df1_expected = df1.set_axis([], axis=1) + df1_expected = df1 df2 = DataFrame(columns=["a"]) df2["a"] = df2["a"].astype("float64") df2_expected = df2 diff --git a/pandas/tests/window/test_rolling_functions.py b/pandas/tests/window/test_rolling_functions.py index 72c3db478516c..bb6faf4f4eb22 100644 --- a/pandas/tests/window/test_rolling_functions.py +++ b/pandas/tests/window/test_rolling_functions.py @@ -516,7 +516,7 @@ def test_moment_functions_zero_length(f): s = Series(dtype=np.float64) s_expected = s df1 = DataFrame() - df1_expected = df1.set_axis([], axis=1) + df1_expected = df1 df2 = DataFrame(columns=["a"]) df2["a"] = df2["a"].astype("float64") df2_expected = df2 From 1461d27049e5572bc06531b26c1e4f1e99ea0208 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sun, 27 Nov 2022 19:40:22 +0000 Subject: [PATCH 04/10] Add docs --- doc/source/whatsnew/v2.0.0.rst | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 7838ef8df4164..109bb2c0b624e 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -311,6 +311,39 @@ The new behavior, as for datetime64, either gives exactly the requested dtype or ser.astype("timedelta64[s]") ser.astype("timedelta64[D]") +.. _whatsnew_200.api_breaking.zero_len_indexes: + +Empty DataFrames/Series will now default to have a ``RangeIndex`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When users don't specify indexes or columns when instantiating :class:`DataFrame` or :class:`Series` +objects, pandas infers the index type for them. Previously, if it was not possible to +infer the index type from the data, pandas used to infer the index to be a :class:`RangeIndex`, +except when the frame/series was empty, then the index was inferred to be an :class:`Index` +with dtype `object`. + +This has been changed, so the type of index/columns will always be :class:`RangeIndex`, +when users don't declare the index and it's not possible to infer the index type from the data (:issue:`49572`). + +*Previous behavior*: + +.. code-block:: ipython + + In [8]: pd.Series().index + Out[8]: + Index([], dtype='object') + + In [9] pd.DataFrame().axes + Out[9]: + [Index([], dtype='object'), Index([], dtype='object')] + +*New behavior*: + +.. ipython:: python + + pd.Series().index + pd.DataFrame().axes + .. _whatsnew_200.api_breaking.deps: Increased minimum versions for dependencies @@ -370,6 +403,7 @@ Other API changes - Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`) - Changed behavior of :meth:`DataFrame.shift` with ``axis=1``, an integer ``fill_value``, and homogeneous datetime-like dtype, this now fills new columns with integer dtypes instead of casting to datetimelike (:issue:`49842`) - Files are now closed when encountering an exception in :func:`read_json` (:issue:`49921`) +- Changed behavior of :func:`read_csv`, :func:`read_json` & :func:`read_fwf`, where the index will now always be a :class:`RangeIndex`, when no index is specified. Previously the index would be a :class:`Index` with dtype `object` if the new DataFrame/Series has length 0 (:issue:`49572`) - :meth:`DataFrame.values`, :meth:`DataFrame.to_numpy`, :meth:`DataFrame.xs`, :meth:`DataFrame.reindex`, :meth:`DataFrame.fillna`, and :meth:`DataFrame.replace` no longer silently consolidate the underlying arrays; do ``df = df.copy()`` to ensure consolidation (:issue:`49356`) - From ba72c340df4ab07f42045dab2771ee0500c5de6d Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Mon, 28 Nov 2022 04:16:33 +0000 Subject: [PATCH 05/10] double ticks --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 109bb2c0b624e..e6dc103a02f29 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -320,7 +320,7 @@ When users don't specify indexes or columns when instantiating :class:`DataFrame objects, pandas infers the index type for them. Previously, if it was not possible to infer the index type from the data, pandas used to infer the index to be a :class:`RangeIndex`, except when the frame/series was empty, then the index was inferred to be an :class:`Index` -with dtype `object`. +with dtype ``object``. This has been changed, so the type of index/columns will always be :class:`RangeIndex`, when users don't declare the index and it's not possible to infer the index type from the data (:issue:`49572`). From c02159510993ffbe4bc3a601c1b5a5ed146d06f6 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Mon, 28 Nov 2022 04:20:59 +0000 Subject: [PATCH 06/10] unneeded line --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ecb9387f4d705..37c48bb7adbba 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2313,7 +2313,6 @@ def maybe_reorder( result_index = None if len(arrays) == 0 and index is None and length == 0: - # for backward compat use an object Index instead of RangeIndex result_index = default_index(0) arrays, arr_columns = reorder_arrays(arrays, arr_columns, columns, length) From 65147e23508a7a8444d21d61a7d4f8909ceb8be9 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Tue, 29 Nov 2022 08:19:43 +0000 Subject: [PATCH 07/10] update thatsnew text --- doc/source/whatsnew/v2.0.0.rst | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index e6dc103a02f29..42b780d28ac7f 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -316,14 +316,12 @@ The new behavior, as for datetime64, either gives exactly the requested dtype or Empty DataFrames/Series will now default to have a ``RangeIndex`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -When users don't specify indexes or columns when instantiating :class:`DataFrame` or :class:`Series` -objects, pandas infers the index type for them. Previously, if it was not possible to -infer the index type from the data, pandas used to infer the index to be a :class:`RangeIndex`, -except when the frame/series was empty, then the index was inferred to be an :class:`Index` -with dtype ``object``. - -This has been changed, so the type of index/columns will always be :class:`RangeIndex`, -when users don't declare the index and it's not possible to infer the index type from the data (:issue:`49572`). +When users instantiate :class:`DataFrame` or :class:`Series` objects without specifying the axes (``index=None`` +and/or ``columns=None``), pandas infers the index type for them. Previously, when instantiating +in such cases, and it was not possible to infer the index type from the passed-in ``data``, +pandas used to infer the index to be a :class:`RangeIndex`, except when constructing an empty (length 0) +``Series`` or ``DataFrame`` without specifying the axes (``index=None``, ``columns=None``) +then it would return the axes as empty Indexes with object dtype. Now, the axes return an ``RangeIndex`` in that case also (:issue:`49572`). *Previous behavior*: From 7463c4c31bee1fbcaae77fe826fd52ee45422705 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Wed, 30 Nov 2022 07:34:15 +0000 Subject: [PATCH 08/10] update whatsnew text --- doc/source/whatsnew/v2.0.0.rst | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 42b780d28ac7f..996c567880475 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -316,12 +316,10 @@ The new behavior, as for datetime64, either gives exactly the requested dtype or Empty DataFrames/Series will now default to have a ``RangeIndex`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -When users instantiate :class:`DataFrame` or :class:`Series` objects without specifying the axes (``index=None`` -and/or ``columns=None``), pandas infers the index type for them. Previously, when instantiating -in such cases, and it was not possible to infer the index type from the passed-in ``data``, -pandas used to infer the index to be a :class:`RangeIndex`, except when constructing an empty (length 0) -``Series`` or ``DataFrame`` without specifying the axes (``index=None``, ``columns=None``) -then it would return the axes as empty Indexes with object dtype. Now, the axes return an ``RangeIndex`` in that case also (:issue:`49572`). +Before, constructing an empty (length 0) :class:`Series` or :class:`DataFrame` without +specifying the axes (``index=None``, ``columns=None``) would return the axes as empty :class:`Index` with object dtype. + +Now, the axes return an empty :class:`RangeIndex`. *Previous behavior*: From 3f478f4731f08612ef6f6b90c027b3e54127e066 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Fri, 2 Dec 2022 15:29:55 +0000 Subject: [PATCH 09/10] fix rst --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 996c567880475..6bcf74940840b 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -399,7 +399,7 @@ Other API changes - Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`) - Changed behavior of :meth:`DataFrame.shift` with ``axis=1``, an integer ``fill_value``, and homogeneous datetime-like dtype, this now fills new columns with integer dtypes instead of casting to datetimelike (:issue:`49842`) - Files are now closed when encountering an exception in :func:`read_json` (:issue:`49921`) -- Changed behavior of :func:`read_csv`, :func:`read_json` & :func:`read_fwf`, where the index will now always be a :class:`RangeIndex`, when no index is specified. Previously the index would be a :class:`Index` with dtype `object` if the new DataFrame/Series has length 0 (:issue:`49572`) +- Changed behavior of :func:`read_csv`, :func:`read_json` & :func:`read_fwf`, where the index will now always be a :class:`RangeIndex`, when no index is specified. Previously the index would be a :class:`Index` with dtype ``object`` if the new DataFrame/Series has length 0 (:issue:`49572`) - :meth:`DataFrame.values`, :meth:`DataFrame.to_numpy`, :meth:`DataFrame.xs`, :meth:`DataFrame.reindex`, :meth:`DataFrame.fillna`, and :meth:`DataFrame.replace` no longer silently consolidate the underlying arrays; do ``df = df.copy()`` to ensure consolidation (:issue:`49356`) - From 131dfefb4eb7e13e4690b3fb61b93403974f0ef0 Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Sat, 3 Dec 2022 10:03:31 +0000 Subject: [PATCH 10/10] Update doc/source/whatsnew/v2.0.0.rst Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 6bcf74940840b..1fc7a06f8b7f0 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -316,7 +316,7 @@ The new behavior, as for datetime64, either gives exactly the requested dtype or Empty DataFrames/Series will now default to have a ``RangeIndex`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Before, constructing an empty (length 0) :class:`Series` or :class:`DataFrame` without +Before, constructing an empty (where ``data`` is ``None`` or an empty list-like argument) :class:`Series` or :class:`DataFrame` without specifying the axes (``index=None``, ``columns=None``) would return the axes as empty :class:`Index` with object dtype. Now, the axes return an empty :class:`RangeIndex`.