Skip to content

Commit 849016c

Browse files
authored
REF: Use default_index or preserve original Index type for empty-like results (#59035)
* Use more default_index for empty cases * fix tests * Update number * Address typing
1 parent f9f12de commit 849016c

File tree

13 files changed

+35
-23
lines changed

13 files changed

+35
-23
lines changed

doc/source/whatsnew/v3.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -503,8 +503,8 @@ Timezones
503503

504504
Numeric
505505
^^^^^^^
506+
- Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`59035`)
506507
- Bug in ``np.matmul`` with :class:`Index` inputs raising a ``TypeError`` (:issue:`57079`)
507-
-
508508

509509
Conversion
510510
^^^^^^^^^^

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -13078,7 +13078,7 @@ def quantile(
1307813078

1307913079
if len(data.columns) == 0:
1308013080
# GH#23925 _get_numeric_data may have dropped all columns
13081-
cols = Index([], name=self.columns.name)
13081+
cols = self.columns[:0]
1308213082

1308313083
dtype = np.float64
1308413084
if axis == 1:

pandas/core/generic.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,6 @@
158158
Index,
159159
MultiIndex,
160160
PeriodIndex,
161-
RangeIndex,
162161
default_index,
163162
ensure_index,
164163
)
@@ -1852,7 +1851,7 @@ def _drop_labels_or_levels(self, keys, axis: AxisInt = 0):
18521851
else:
18531852
# Drop the last level of Index by replacing with
18541853
# a RangeIndex
1855-
dropped.columns = RangeIndex(dropped.columns.size)
1854+
dropped.columns = default_index(dropped.columns.size)
18561855

18571856
# Handle dropping index labels
18581857
if labels_to_drop:

pandas/core/groupby/groupby.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,6 @@ class providing the base-class of operations.
128128
from pandas.core.indexes.api import (
129129
Index,
130130
MultiIndex,
131-
RangeIndex,
132131
default_index,
133132
)
134133
from pandas.core.internals.blocks import ensure_block_shape
@@ -1264,7 +1263,7 @@ def _set_result_index_ordered(
12641263
if self._grouper.has_dropped_na:
12651264
# Add back in any missing rows due to dropna - index here is integral
12661265
# with values referring to the row of the input so can use RangeIndex
1267-
result = result.reindex(RangeIndex(len(index)), axis=0)
1266+
result = result.reindex(default_index(len(index)), axis=0)
12681267
result = result.set_axis(index, axis=0)
12691268

12701269
return result
@@ -1334,7 +1333,7 @@ def _wrap_aggregated_output(
13341333
# enforced in __init__
13351334
result = self._insert_inaxis_grouper(result, qs=qs)
13361335
result = result._consolidate()
1337-
result.index = RangeIndex(len(result))
1336+
result.index = default_index(len(result))
13381337

13391338
else:
13401339
index = self._grouper.result_index

pandas/core/groupby/grouper.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from pandas.core.indexes.api import (
3535
Index,
3636
MultiIndex,
37+
default_index,
3738
)
3839
from pandas.core.series import Series
3940

@@ -901,7 +902,7 @@ def is_in_obj(gpr) -> bool:
901902
if len(groupings) == 0 and len(obj):
902903
raise ValueError("No group keys passed!")
903904
if len(groupings) == 0:
904-
groupings.append(Grouping(Index([], dtype="int"), np.array([], dtype=np.intp)))
905+
groupings.append(Grouping(default_index(0), np.array([], dtype=np.intp)))
905906

906907
# create the internals grouper
907908
grouper = ops.BaseGrouper(group_axis, groupings, sort=sort, dropna=dropna)

pandas/core/indexes/api.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def _get_combined_index(
130130
# TODO: handle index names!
131131
indexes = _get_distinct_objs(indexes)
132132
if len(indexes) == 0:
133-
index = Index([])
133+
index: Index = default_index(0)
134134
elif len(indexes) == 1:
135135
index = indexes[0]
136136
elif intersect:

pandas/core/internals/managers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ def blklocs(self) -> npt.NDArray[np.intp]:
249249
def make_empty(self, axes=None) -> Self:
250250
"""return an empty BlockManager with the items axis of len 0"""
251251
if axes is None:
252-
axes = [Index([])] + self.axes[1:]
252+
axes = [default_index(0)] + self.axes[1:]
253253

254254
# preserve dtype if possible
255255
if self.ndim == 1:

pandas/core/methods/selectn.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
)
3030
from pandas.core.dtypes.dtypes import BaseMaskedDtype
3131

32+
from pandas.core.indexes.api import default_index
33+
3234
if TYPE_CHECKING:
3335
from pandas._typing import (
3436
DtypeObj,
@@ -38,6 +40,7 @@
3840

3941
from pandas import (
4042
DataFrame,
43+
Index,
4144
Series,
4245
)
4346
else:
@@ -199,8 +202,6 @@ def __init__(self, obj: DataFrame, n: int, keep: str, columns: IndexLabel) -> No
199202
self.columns = columns
200203

201204
def compute(self, method: str) -> DataFrame:
202-
from pandas.core.api import Index
203-
204205
n = self.n
205206
frame = self.obj
206207
columns = self.columns
@@ -227,7 +228,7 @@ def get_indexer(current_indexer: Index, other_indexer: Index) -> Index:
227228
original_index = frame.index
228229
cur_frame = frame = frame.reset_index(drop=True)
229230
cur_n = n
230-
indexer = Index([], dtype=np.int64)
231+
indexer: Index = default_index(0)
231232

232233
for i, column in enumerate(columns):
233234
# For each column we apply method to cur_frame[column].

pandas/core/reshape/reshape.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
from pandas.core.indexes.api import (
4343
Index,
4444
MultiIndex,
45-
RangeIndex,
45+
default_index,
4646
)
4747
from pandas.core.reshape.concat import concat
4848
from pandas.core.series import Series
@@ -1047,7 +1047,7 @@ def stack_reshape(
10471047
if data.ndim == 1:
10481048
data.name = 0
10491049
else:
1050-
data.columns = RangeIndex(len(data.columns))
1050+
data.columns = default_index(len(data.columns))
10511051
buf.append(data)
10521052

10531053
if len(buf) > 0 and not frame.empty:

pandas/tests/frame/methods/test_quantile.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -710,14 +710,14 @@ def test_quantile_empty_no_columns(self, interp_method):
710710
result = df.quantile(
711711
0.5, numeric_only=True, interpolation=interpolation, method=method
712712
)
713-
expected = Series([], index=[], name=0.5, dtype=np.float64)
713+
expected = Series([], name=0.5, dtype=np.float64)
714714
expected.index.name = "captain tightpants"
715715
tm.assert_series_equal(result, expected)
716716

717717
result = df.quantile(
718718
[0.5], numeric_only=True, interpolation=interpolation, method=method
719719
)
720-
expected = DataFrame([], index=[0.5], columns=[])
720+
expected = DataFrame([], index=[0.5])
721721
expected.columns.name = "captain tightpants"
722722
tm.assert_frame_equal(result, expected)
723723

@@ -926,3 +926,12 @@ def test_datelike_numeric_only(self, expected_data, expected_index, axis):
926926
expected_data, name=0.5, index=Index(expected_index), dtype=np.float64
927927
)
928928
tm.assert_series_equal(result, expected)
929+
930+
931+
def test_multi_quantile_numeric_only_retains_columns():
932+
df = DataFrame(list("abc"))
933+
result = df.quantile([0.5, 0.7], numeric_only=True)
934+
expected = DataFrame(index=[0.5, 0.7])
935+
tm.assert_frame_equal(
936+
result, expected, check_index_type=True, check_column_type=True
937+
)

pandas/tests/generic/test_generic.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -93,15 +93,19 @@ def test_get_numeric_data(self, frame_or_series):
9393
if isinstance(o, DataFrame):
9494
# preserve columns dtype
9595
expected.columns = o.columns[:0]
96-
# https://github.com/pandas-dev/pandas/issues/50862
97-
tm.assert_equal(result.reset_index(drop=True), expected)
96+
tm.assert_equal(result, expected)
9897

9998
# get the bool data
10099
arr = np.array([True, True, False, True])
101100
o = construct(frame_or_series, n, value=arr, **kwargs)
102101
result = o._get_numeric_data()
103102
tm.assert_equal(result, o)
104103

104+
def test_get_bool_data_empty_preserve_index(self):
105+
expected = Series([], dtype="bool")
106+
result = expected._get_bool_data()
107+
tm.assert_series_equal(result, expected, check_index_type=True)
108+
105109
def test_nonzero(self, frame_or_series):
106110
# GH 4633
107111
# look at the boolean/nonzero behavior for objects

pandas/tests/indexes/test_base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1558,7 +1558,7 @@ def test_ensure_index_uint64(self):
15581558

15591559
def test_get_combined_index(self):
15601560
result = _get_combined_index([])
1561-
expected = Index([])
1561+
expected = RangeIndex(0)
15621562
tm.assert_index_equal(result, expected)
15631563

15641564

pandas/tests/series/methods/test_get_numeric_data.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from pandas import (
2-
Index,
32
Series,
43
date_range,
54
)
@@ -19,7 +18,7 @@ def test_get_numeric_data_preserve_dtype(self):
1918

2019
obj = Series([1, "2", 3.0])
2120
result = obj._get_numeric_data()
22-
expected = Series([], dtype=object, index=Index([], dtype=object))
21+
expected = Series([], dtype=object)
2322
tm.assert_series_equal(result, expected)
2423

2524
obj = Series([True, False, True])
@@ -28,5 +27,5 @@ def test_get_numeric_data_preserve_dtype(self):
2827

2928
obj = Series(date_range("20130101", periods=3))
3029
result = obj._get_numeric_data()
31-
expected = Series([], dtype="M8[ns]", index=Index([], dtype=object))
30+
expected = Series([], dtype="M8[ns]")
3231
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)