Skip to content

REF: Use default_index or preserve original Index type for empty-like results #59035

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -503,8 +503,8 @@ Timezones

Numeric
^^^^^^^
- Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`59035`)
- Bug in ``np.matmul`` with :class:`Index` inputs raising a ``TypeError`` (:issue:`57079`)
-

Conversion
^^^^^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -13078,7 +13078,7 @@ def quantile(

if len(data.columns) == 0:
# GH#23925 _get_numeric_data may have dropped all columns
cols = Index([], name=self.columns.name)
cols = self.columns[:0]

dtype = np.float64
if axis == 1:
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,6 @@
Index,
MultiIndex,
PeriodIndex,
RangeIndex,
default_index,
ensure_index,
)
Expand Down Expand Up @@ -1852,7 +1851,7 @@ def _drop_labels_or_levels(self, keys, axis: AxisInt = 0):
else:
# Drop the last level of Index by replacing with
# a RangeIndex
dropped.columns = RangeIndex(dropped.columns.size)
dropped.columns = default_index(dropped.columns.size)

# Handle dropping index labels
if labels_to_drop:
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ class providing the base-class of operations.
from pandas.core.indexes.api import (
Index,
MultiIndex,
RangeIndex,
default_index,
)
from pandas.core.internals.blocks import ensure_block_shape
Expand Down Expand Up @@ -1264,7 +1263,7 @@ def _set_result_index_ordered(
if self._grouper.has_dropped_na:
# Add back in any missing rows due to dropna - index here is integral
# with values referring to the row of the input so can use RangeIndex
result = result.reindex(RangeIndex(len(index)), axis=0)
result = result.reindex(default_index(len(index)), axis=0)
result = result.set_axis(index, axis=0)

return result
Expand Down Expand Up @@ -1334,7 +1333,7 @@ def _wrap_aggregated_output(
# enforced in __init__
result = self._insert_inaxis_grouper(result, qs=qs)
result = result._consolidate()
result.index = RangeIndex(len(result))
result.index = default_index(len(result))

else:
index = self._grouper.result_index
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from pandas.core.indexes.api import (
Index,
MultiIndex,
default_index,
)
from pandas.core.series import Series

Expand Down Expand Up @@ -901,7 +902,7 @@ def is_in_obj(gpr) -> bool:
if len(groupings) == 0 and len(obj):
raise ValueError("No group keys passed!")
if len(groupings) == 0:
groupings.append(Grouping(Index([], dtype="int"), np.array([], dtype=np.intp)))
groupings.append(Grouping(default_index(0), np.array([], dtype=np.intp)))

# create the internals grouper
grouper = ops.BaseGrouper(group_axis, groupings, sort=sort, dropna=dropna)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def _get_combined_index(
# TODO: handle index names!
indexes = _get_distinct_objs(indexes)
if len(indexes) == 0:
index = Index([])
index: Index = default_index(0)
elif len(indexes) == 1:
index = indexes[0]
elif intersect:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def blklocs(self) -> npt.NDArray[np.intp]:
def make_empty(self, axes=None) -> Self:
"""return an empty BlockManager with the items axis of len 0"""
if axes is None:
axes = [Index([])] + self.axes[1:]
axes = [default_index(0)] + self.axes[1:]

# preserve dtype if possible
if self.ndim == 1:
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/methods/selectn.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
)
from pandas.core.dtypes.dtypes import BaseMaskedDtype

from pandas.core.indexes.api import default_index

if TYPE_CHECKING:
from pandas._typing import (
DtypeObj,
Expand All @@ -38,6 +40,7 @@

from pandas import (
DataFrame,
Index,
Series,
)
else:
Expand Down Expand Up @@ -199,8 +202,6 @@ def __init__(self, obj: DataFrame, n: int, keep: str, columns: IndexLabel) -> No
self.columns = columns

def compute(self, method: str) -> DataFrame:
from pandas.core.api import Index

n = self.n
frame = self.obj
columns = self.columns
Expand All @@ -227,7 +228,7 @@ def get_indexer(current_indexer: Index, other_indexer: Index) -> Index:
original_index = frame.index
cur_frame = frame = frame.reset_index(drop=True)
cur_n = n
indexer = Index([], dtype=np.int64)
indexer: Index = default_index(0)

for i, column in enumerate(columns):
# For each column we apply method to cur_frame[column].
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
from pandas.core.indexes.api import (
Index,
MultiIndex,
RangeIndex,
default_index,
)
from pandas.core.reshape.concat import concat
from pandas.core.series import Series
Expand Down Expand Up @@ -1047,7 +1047,7 @@ def stack_reshape(
if data.ndim == 1:
data.name = 0
else:
data.columns = RangeIndex(len(data.columns))
data.columns = default_index(len(data.columns))
buf.append(data)

if len(buf) > 0 and not frame.empty:
Expand Down
13 changes: 11 additions & 2 deletions pandas/tests/frame/methods/test_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,14 +710,14 @@ def test_quantile_empty_no_columns(self, interp_method):
result = df.quantile(
0.5, numeric_only=True, interpolation=interpolation, method=method
)
expected = Series([], index=[], name=0.5, dtype=np.float64)
expected = Series([], name=0.5, dtype=np.float64)
expected.index.name = "captain tightpants"
tm.assert_series_equal(result, expected)

result = df.quantile(
[0.5], numeric_only=True, interpolation=interpolation, method=method
)
expected = DataFrame([], index=[0.5], columns=[])
expected = DataFrame([], index=[0.5])
expected.columns.name = "captain tightpants"
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -926,3 +926,12 @@ def test_datelike_numeric_only(self, expected_data, expected_index, axis):
expected_data, name=0.5, index=Index(expected_index), dtype=np.float64
)
tm.assert_series_equal(result, expected)


def test_multi_quantile_numeric_only_retains_columns():
df = DataFrame(list("abc"))
result = df.quantile([0.5, 0.7], numeric_only=True)
expected = DataFrame(index=[0.5, 0.7])
tm.assert_frame_equal(
result, expected, check_index_type=True, check_column_type=True
)
8 changes: 6 additions & 2 deletions pandas/tests/generic/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,15 +93,19 @@ def test_get_numeric_data(self, frame_or_series):
if isinstance(o, DataFrame):
# preserve columns dtype
expected.columns = o.columns[:0]
# https://github.com/pandas-dev/pandas/issues/50862
tm.assert_equal(result.reset_index(drop=True), expected)
tm.assert_equal(result, expected)

# get the bool data
arr = np.array([True, True, False, True])
o = construct(frame_or_series, n, value=arr, **kwargs)
result = o._get_numeric_data()
tm.assert_equal(result, o)

def test_get_bool_data_empty_preserve_index(self):
expected = Series([], dtype="bool")
result = expected._get_bool_data()
tm.assert_series_equal(result, expected, check_index_type=True)

def test_nonzero(self, frame_or_series):
# GH 4633
# look at the boolean/nonzero behavior for objects
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1558,7 +1558,7 @@ def test_ensure_index_uint64(self):

def test_get_combined_index(self):
result = _get_combined_index([])
expected = Index([])
expected = RangeIndex(0)
tm.assert_index_equal(result, expected)


Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/series/methods/test_get_numeric_data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from pandas import (
Index,
Series,
date_range,
)
Expand All @@ -19,7 +18,7 @@ def test_get_numeric_data_preserve_dtype(self):

obj = Series([1, "2", 3.0])
result = obj._get_numeric_data()
expected = Series([], dtype=object, index=Index([], dtype=object))
expected = Series([], dtype=object)
tm.assert_series_equal(result, expected)

obj = Series([True, False, True])
Expand All @@ -28,5 +27,5 @@ def test_get_numeric_data_preserve_dtype(self):

obj = Series(date_range("20130101", periods=3))
result = obj._get_numeric_data()
expected = Series([], dtype="M8[ns]", index=Index([], dtype=object))
expected = Series([], dtype="M8[ns]")
tm.assert_series_equal(result, expected)
Loading