Skip to content

Commit 4237e62

Browse files
Terji PetersenTerji Petersen
Terji Petersen
authored and
Terji Petersen
committed
BUG/API: ndexes on empty frames/series should be RangeIndex, are Index[object]
1 parent 3c72d6f commit 4237e62

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+129
-109
lines changed

pandas/core/frame.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -632,8 +632,6 @@ def __init__(
632632
copy: bool | None = None,
633633
) -> None:
634634

635-
if data is None:
636-
data = {}
637635
if dtype is not None:
638636
dtype = self._validate_dtype(dtype)
639637

@@ -671,6 +669,12 @@ def __init__(
671669
else:
672670
copy = False
673671

672+
if data is None:
673+
index = index if index is not None else default_index(0)
674+
columns = columns if columns is not None else default_index(0)
675+
dtype = dtype if dtype is not None else pandas_dtype(object)
676+
data = []
677+
674678
if isinstance(data, (BlockManager, ArrayManager)):
675679
mgr = self._init_mgr(
676680
data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy
@@ -777,7 +781,7 @@ def __init__(
777781
mgr = dict_to_mgr(
778782
{},
779783
index,
780-
columns,
784+
columns if columns is not None else default_index(0),
781785
dtype=dtype,
782786
typ=manager,
783787
)
@@ -2310,7 +2314,7 @@ def maybe_reorder(
23102314
result_index = None
23112315
if len(arrays) == 0 and index is None and length == 0:
23122316
# for backward compat use an object Index instead of RangeIndex
2313-
result_index = Index([])
2317+
result_index = default_index(0)
23142318

23152319
arrays, arr_columns = reorder_arrays(arrays, arr_columns, columns, length)
23162320
return arrays, arr_columns, result_index

pandas/core/internals/construction.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -580,9 +580,9 @@ def _extract_index(data) -> Index:
580580
"""
581581
Try to infer an Index from the passed data, raise ValueError on failure.
582582
"""
583-
index = None
583+
index: Index | None = None
584584
if len(data) == 0:
585-
index = Index([])
585+
index = default_index(0)
586586
else:
587587
raw_lengths = []
588588
indexes: list[list[Hashable] | Index] = []

pandas/core/reshape/merge.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1063,8 +1063,8 @@ def _get_join_info(
10631063
else:
10641064
join_index = default_index(len(left_indexer))
10651065

1066-
if len(join_index) == 0:
1067-
join_index = join_index.astype(object)
1066+
if len(join_index) == 0 and not isinstance(join_index, MultiIndex):
1067+
join_index = default_index(0).set_names(join_index.name)
10681068
return join_index, left_indexer, right_indexer
10691069

10701070
def _create_join_index(

pandas/core/series.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -385,11 +385,16 @@ def __init__(
385385
if index is not None:
386386
index = ensure_index(index)
387387

388-
if data is None:
389-
data = {}
390388
if dtype is not None:
391389
dtype = self._validate_dtype(dtype)
392390

391+
if data is None:
392+
index = index if index is not None else default_index(0)
393+
if len(index) or dtype is not None:
394+
data = na_value_for_dtype(pandas_dtype(dtype), compat=False)
395+
else:
396+
data = []
397+
393398
if isinstance(data, MultiIndex):
394399
raise NotImplementedError(
395400
"initializing a Series from a MultiIndex is not supported"

pandas/io/parsers/base_parser.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
from pandas.core.indexes.api import (
8181
Index,
8282
MultiIndex,
83+
default_index,
8384
ensure_index_from_sequences,
8485
)
8586
from pandas.core.series import Series
@@ -1085,8 +1086,9 @@ def _get_empty_meta(
10851086
#
10861087
# Both must be non-null to ensure a successful construction. Otherwise,
10871088
# we have to create a generic empty Index.
1089+
index: Index
10881090
if (index_col is None or index_col is False) or index_names is None:
1089-
index = Index([])
1091+
index = default_index(0)
10901092
else:
10911093
data = [Series([], dtype=dtype_dict[name]) for name in index_names]
10921094
index = ensure_index_from_sequences(data, names=index_names)

pandas/tests/apply/test_frame_apply.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -114,14 +114,14 @@ def test_apply_with_reduce_empty():
114114
result = empty_frame.apply(x.append, axis=1, result_type="expand")
115115
tm.assert_frame_equal(result, empty_frame)
116116
result = empty_frame.apply(x.append, axis=1, result_type="reduce")
117-
expected = Series([], index=pd.Index([], dtype=object), dtype=np.float64)
117+
expected = Series([], dtype=np.float64)
118118
tm.assert_series_equal(result, expected)
119119

120120
empty_with_cols = DataFrame(columns=["a", "b", "c"])
121121
result = empty_with_cols.apply(x.append, axis=1, result_type="expand")
122122
tm.assert_frame_equal(result, empty_with_cols)
123123
result = empty_with_cols.apply(x.append, axis=1, result_type="reduce")
124-
expected = Series([], index=pd.Index([], dtype=object), dtype=np.float64)
124+
expected = Series([], dtype=np.float64)
125125
tm.assert_series_equal(result, expected)
126126

127127
# Ensure that x.append hasn't been called
@@ -147,7 +147,7 @@ def test_nunique_empty():
147147
tm.assert_series_equal(result, expected)
148148

149149
result = df.T.nunique()
150-
expected = Series([], index=pd.Index([]), dtype=np.float64)
150+
expected = Series([], dtype=np.float64)
151151
tm.assert_series_equal(result, expected)
152152

153153

pandas/tests/apply/test_str.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
from pandas import (
1010
DataFrame,
11-
Index,
1211
Series,
1312
)
1413
import pandas._testing as tm
@@ -149,8 +148,8 @@ def test_agg_cython_table_series(series, func, expected):
149148
tm.get_cython_table_params(
150149
Series(dtype=np.float64),
151150
[
152-
("cumprod", Series([], Index([]), dtype=np.float64)),
153-
("cumsum", Series([], Index([]), dtype=np.float64)),
151+
("cumprod", Series([], dtype=np.float64)),
152+
("cumsum", Series([], dtype=np.float64)),
154153
],
155154
),
156155
tm.get_cython_table_params(

pandas/tests/extension/base/constructors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ def test_construct_empty_dataframe(self, dtype):
119119
# GH 33623
120120
result = pd.DataFrame(columns=["a"], dtype=dtype)
121121
expected = pd.DataFrame(
122-
{"a": pd.array([], dtype=dtype)}, index=pd.Index([], dtype="object")
122+
{"a": pd.array([], dtype=dtype)}, index=pd.RangeIndex(0)
123123
)
124124
self.assert_frame_equal(result, expected)
125125

pandas/tests/extension/base/missing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def test_dropna_frame(self, data_missing):
5555

5656
# axis = 1
5757
result = df.dropna(axis="columns")
58-
expected = pd.DataFrame(index=[0, 1])
58+
expected = pd.DataFrame(index=pd.RangeIndex(2), columns=pd.Index([]))
5959
self.assert_frame_equal(result, expected)
6060

6161
# multiple

pandas/tests/frame/indexing/test_xs.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def test_xs_corner(self):
8484
# no columns but Index(dtype=object)
8585
df = DataFrame(index=["a", "b", "c"])
8686
result = df.xs("a")
87-
expected = Series([], name="a", index=Index([]), dtype=np.float64)
87+
expected = Series([], name="a", dtype=np.float64)
8888
tm.assert_series_equal(result, expected)
8989

9090
def test_xs_duplicates(self):

pandas/tests/frame/methods/test_count.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def test_count(self):
2828

2929
df = DataFrame()
3030
result = df.count()
31-
expected = Series(0, index=[])
31+
expected = Series(dtype="int64")
3232
tm.assert_series_equal(result, expected)
3333

3434
def test_count_objects(self, float_string_frame):

pandas/tests/frame/methods/test_get_numeric_data.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def test_get_numeric_data_preserve_dtype(self):
1717
# get the numeric data
1818
obj = DataFrame({"A": [1, "2", 3.0]})
1919
result = obj._get_numeric_data()
20-
expected = DataFrame(index=[0, 1, 2], dtype=object)
20+
expected = DataFrame(dtype=object, index=pd.RangeIndex(3), columns=[])
2121
tm.assert_frame_equal(result, expected)
2222

2323
def test_get_numeric_data(self):

pandas/tests/frame/methods/test_quantile.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ def test_quantile_datetime(self):
420420
tm.assert_series_equal(result, expected)
421421

422422
result = df[["a", "c"]].quantile([0.5], numeric_only=True)
423-
expected = DataFrame(index=[0.5])
423+
expected = DataFrame(index=[0.5], columns=[])
424424
tm.assert_frame_equal(result, expected)
425425

426426
@pytest.mark.parametrize(
@@ -451,7 +451,7 @@ def test_quantile_dt64_empty(self, dtype, interp_method):
451451
interpolation=interpolation,
452452
method=method,
453453
)
454-
expected = DataFrame(index=[0.5])
454+
expected = DataFrame(index=[0.5], columns=[])
455455
tm.assert_frame_equal(res, expected)
456456

457457
@pytest.mark.parametrize("invalid", [-1, 2, [0.5, -1], [0.5, 2]])

pandas/tests/frame/methods/test_rank.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,7 @@ def test_rank_object_first(self, frame_or_series, na_option, ascending, expected
483483
"data,expected",
484484
[
485485
({"a": [1, 2, "a"], "b": [4, 5, 6]}, DataFrame({"b": [1.0, 2.0, 3.0]})),
486-
({"a": [1, 2, "a"]}, DataFrame(index=range(3))),
486+
({"a": [1, 2, "a"]}, DataFrame(index=range(3), columns=[])),
487487
],
488488
)
489489
def test_rank_mixed_axis_zero(self, data, expected):

pandas/tests/frame/methods/test_to_csv.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ def test_to_csv_dup_cols(self, nrows):
390390
def test_to_csv_empty(self):
391391
df = DataFrame(index=np.arange(10))
392392
result, expected = self._return_result_expected(df, 1000)
393-
tm.assert_frame_equal(result, expected, check_names=False)
393+
tm.assert_frame_equal(result, expected, check_column_type=False)
394394

395395
@pytest.mark.slow
396396
def test_to_csv_chunksize(self):

pandas/tests/frame/test_constructors.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -194,13 +194,11 @@ def test_series_with_name_not_matching_column(self):
194194
[
195195
lambda: DataFrame(),
196196
lambda: DataFrame(None),
197-
lambda: DataFrame({}),
198197
lambda: DataFrame(()),
199198
lambda: DataFrame([]),
200199
lambda: DataFrame(_ for _ in []),
201200
lambda: DataFrame(range(0)),
202201
lambda: DataFrame(data=None),
203-
lambda: DataFrame(data={}),
204202
lambda: DataFrame(data=()),
205203
lambda: DataFrame(data=[]),
206204
lambda: DataFrame(data=(_ for _ in [])),
@@ -214,6 +212,20 @@ def test_empty_constructor(self, constructor):
214212
assert len(result.columns) == 0
215213
tm.assert_frame_equal(result, expected)
216214

215+
@pytest.mark.parametrize(
216+
"constructor",
217+
[
218+
lambda: DataFrame({}),
219+
lambda: DataFrame(data={}),
220+
],
221+
)
222+
def test_empty_constructor_object_index(self, constructor):
223+
expected = DataFrame(columns=Index([]))
224+
result = constructor()
225+
assert len(result.index) == 0
226+
assert len(result.columns) == 0
227+
tm.assert_frame_equal(result, expected, check_index_type=True)
228+
217229
@pytest.mark.parametrize(
218230
"emptylike,expected_index,expected_columns",
219231
[
@@ -1392,7 +1404,7 @@ def test_constructor_generator(self):
13921404
def test_constructor_list_of_dicts(self):
13931405

13941406
result = DataFrame([{}])
1395-
expected = DataFrame(index=[0])
1407+
expected = DataFrame(index=RangeIndex(1), columns=[])
13961408
tm.assert_frame_equal(result, expected)
13971409

13981410
@pytest.mark.parametrize("dict_type", [dict, OrderedDict])
@@ -1753,7 +1765,7 @@ def test_constructor_empty_with_string_dtype(self):
17531765

17541766
def test_constructor_empty_with_string_extension(self, nullable_string_dtype):
17551767
# GH 34915
1756-
expected = DataFrame(index=[], columns=["c1"], dtype=nullable_string_dtype)
1768+
expected = DataFrame(columns=["c1"], dtype=nullable_string_dtype)
17571769
df = DataFrame(columns=["c1"], dtype=nullable_string_dtype)
17581770
tm.assert_frame_equal(df, expected)
17591771

pandas/tests/frame/test_reductions.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1163,7 +1163,7 @@ def test_any_all_bool_only(self):
11631163
)
11641164

11651165
result = df.all(bool_only=True)
1166-
expected = Series(dtype=np.bool_)
1166+
expected = Series(dtype=np.bool_, index=[])
11671167
tm.assert_series_equal(result, expected)
11681168

11691169
df = DataFrame(

pandas/tests/frame/test_stack_unstack.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1251,7 +1251,8 @@ def test_stack_timezone_aware_values():
12511251
@pytest.mark.parametrize("dropna", [True, False])
12521252
def test_stack_empty_frame(dropna):
12531253
# GH 36113
1254-
expected = Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64)
1254+
levels = [np.array([], dtype=np.int64), np.array([], dtype=np.int64)]
1255+
expected = Series(dtype=np.float64, index=MultiIndex(levels=levels, codes=[[], []]))
12551256
result = DataFrame(dtype=np.float64).stack(dropna=dropna)
12561257
tm.assert_series_equal(result, expected)
12571258

pandas/tests/groupby/aggregate/test_aggregate.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -679,7 +679,7 @@ def test_no_args_raises(self):
679679

680680
# but we do allow this
681681
result = gr.agg([])
682-
expected = DataFrame()
682+
expected = DataFrame(columns=[])
683683
tm.assert_frame_equal(result, expected)
684684

685685
def test_series_named_agg_duplicates_no_raises(self):

pandas/tests/groupby/aggregate/test_cython.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,9 @@ def test_cython_agg_nothing_to_agg():
104104
with pytest.raises(TypeError, match="Could not convert"):
105105
frame[["b"]].groupby(frame["a"]).mean()
106106
result = frame[["b"]].groupby(frame["a"]).mean(numeric_only=True)
107-
expected = DataFrame([], index=frame["a"].sort_values().drop_duplicates())
107+
expected = DataFrame(
108+
[], index=frame["a"].sort_values().drop_duplicates(), columns=[]
109+
)
108110
tm.assert_frame_equal(result, expected)
109111

110112

pandas/tests/groupby/test_grouping.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -687,7 +687,7 @@ def test_list_grouper_with_nat(self):
687687
[
688688
(
689689
"transform",
690-
Series(name=2, dtype=np.float64, index=Index([])),
690+
Series(name=2, dtype=np.float64),
691691
),
692692
(
693693
"agg",
@@ -873,7 +873,7 @@ def test_groupby_with_single_column(self):
873873
df = DataFrame({"a": list("abssbab")})
874874
tm.assert_frame_equal(df.groupby("a").get_group("a"), df.iloc[[0, 5]])
875875
# GH 13530
876-
exp = DataFrame(index=Index(["a", "b", "s"], name="a"))
876+
exp = DataFrame(index=Index(["a", "b", "s"], name="a"), columns=[])
877877
tm.assert_frame_equal(df.groupby("a").count(), exp)
878878
tm.assert_frame_equal(df.groupby("a").sum(), exp)
879879

pandas/tests/indexing/test_iloc.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ def check(result, expected):
246246
tm.assert_frame_equal(result, expected)
247247

248248
dfl = DataFrame(np.random.randn(5, 2), columns=list("AB"))
249-
check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index))
249+
check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index, columns=[]))
250250
check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]])
251251
check(dfl.iloc[4:6], dfl.iloc[[4]])
252252

pandas/tests/indexing/test_na_indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def test_series_mask_boolean(values, dtype, mask, indexer_class, frame):
3434
if frame:
3535
if len(values) == 0:
3636
# Otherwise obj is an empty DataFrame with shape (0, 1)
37-
obj = pd.DataFrame(dtype=dtype)
37+
obj = pd.DataFrame(dtype=dtype, index=index)
3838
else:
3939
obj = obj.to_frame()
4040

pandas/tests/indexing/test_partial.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,12 @@ def test_partial_set_empty_frame2(self):
100100

101101
tm.assert_frame_equal(df, expected)
102102

103-
df = DataFrame()
103+
df = DataFrame(index=Index([]))
104104
df["foo"] = Series(df.index)
105105

106106
tm.assert_frame_equal(df, expected)
107107

108-
df = DataFrame()
108+
df = DataFrame(index=Index([]))
109109
df["foo"] = df.index
110110

111111
tm.assert_frame_equal(df, expected)
@@ -135,7 +135,7 @@ def test_partial_set_empty_frame4(self):
135135

136136
def test_partial_set_empty_frame5(self):
137137
df = DataFrame()
138-
tm.assert_index_equal(df.columns, Index([], dtype=object))
138+
tm.assert_index_equal(df.columns, pd.RangeIndex(0))
139139
df2 = DataFrame()
140140
df2[1] = Series([1], index=["foo"])
141141
df.loc[:, 1] = Series([1], index=["foo"])
@@ -182,7 +182,7 @@ def test_partial_set_empty_frame_row(self):
182182
df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]})
183183
y = df[df.A > 5]
184184
result = y.reindex(columns=["A", "B", "C"])
185-
expected = DataFrame(columns=["A", "B", "C"], index=Index([], dtype="int64"))
185+
expected = DataFrame(columns=["A", "B", "C"])
186186
expected["A"] = expected["A"].astype("int64")
187187
expected["B"] = expected["B"].astype("float64")
188188
expected["C"] = expected["C"].astype("float64")

pandas/tests/io/excel/test_readers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1622,7 +1622,7 @@ def test_read_datetime_multiindex(self, request, engine, read_ext):
16221622
pd.to_datetime("03/01/2020").to_pydatetime(),
16231623
],
16241624
)
1625-
expected = DataFrame([], columns=expected_column_index)
1625+
expected = DataFrame([], index=[], columns=expected_column_index)
16261626

16271627
tm.assert_frame_equal(expected, actual)
16281628

0 commit comments

Comments
 (0)