From 3419837246a422b8b1547412edd7cce9575070ac Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sun, 16 Jun 2024 18:27:35 -0700 Subject: [PATCH 1/4] Use more default_index for empty cases --- pandas/core/frame.py | 2 +- pandas/core/generic.py | 3 +-- pandas/core/groupby/groupby.py | 5 ++--- pandas/core/groupby/grouper.py | 3 ++- pandas/core/indexes/api.py | 2 +- pandas/core/internals/managers.py | 2 +- pandas/core/methods/selectn.py | 7 +++++-- pandas/core/reshape/reshape.py | 4 ++-- pandas/tests/frame/methods/test_quantile.py | 9 +++++++++ 9 files changed, 24 insertions(+), 13 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a6c0e1e372530..0aeda77233125 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -13078,7 +13078,7 @@ def quantile( if len(data.columns) == 0: # GH#23925 _get_numeric_data may have dropped all columns - cols = Index([], name=self.columns.name) + cols = self.columns[:0] dtype = np.float64 if axis == 1: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 599b3d5578fca..93068c665a880 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -158,7 +158,6 @@ Index, MultiIndex, PeriodIndex, - RangeIndex, default_index, ensure_index, ) @@ -1852,7 +1851,7 @@ def _drop_labels_or_levels(self, keys, axis: AxisInt = 0): else: # Drop the last level of Index by replacing with # a RangeIndex - dropped.columns = RangeIndex(dropped.columns.size) + dropped.columns = default_index(dropped.columns.size) # Handle dropping index labels if labels_to_drop: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 1b58317c08736..4327d31ca6903 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -128,7 +128,6 @@ class providing the base-class of operations. from pandas.core.indexes.api import ( Index, MultiIndex, - RangeIndex, default_index, ) from pandas.core.internals.blocks import ensure_block_shape @@ -1264,7 +1263,7 @@ def _set_result_index_ordered( if self._grouper.has_dropped_na: # Add back in any missing rows due to dropna - index here is integral # with values referring to the row of the input so can use RangeIndex - result = result.reindex(RangeIndex(len(index)), axis=0) + result = result.reindex(default_index(len(index)), axis=0) result = result.set_axis(index, axis=0) return result @@ -1334,7 +1333,7 @@ def _wrap_aggregated_output( # enforced in __init__ result = self._insert_inaxis_grouper(result, qs=qs) result = result._consolidate() - result.index = RangeIndex(len(result)) + result.index = default_index(len(result)) else: index = self._grouper.result_index diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index e75a5b9089f5f..5f680de77649f 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -34,6 +34,7 @@ from pandas.core.indexes.api import ( Index, MultiIndex, + default_index, ) from pandas.core.series import Series @@ -901,7 +902,7 @@ def is_in_obj(gpr) -> bool: if len(groupings) == 0 and len(obj): raise ValueError("No group keys passed!") if len(groupings) == 0: - groupings.append(Grouping(Index([], dtype="int"), np.array([], dtype=np.intp))) + groupings.append(Grouping(default_index(0), np.array([], dtype=np.intp))) # create the internals grouper grouper = ops.BaseGrouper(group_axis, groupings, sort=sort, dropna=dropna) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 83e8df5072b92..628790dbd8c6d 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -130,7 +130,7 @@ def _get_combined_index( # TODO: handle index names! indexes = _get_distinct_objs(indexes) if len(indexes) == 0: - index = Index([]) + index = default_index(0) elif len(indexes) == 1: index = indexes[0] elif intersect: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 64109f5c1655c..79cba9275a119 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -249,7 +249,7 @@ def blklocs(self) -> npt.NDArray[np.intp]: def make_empty(self, axes=None) -> Self: """return an empty BlockManager with the items axis of len 0""" if axes is None: - axes = [Index([])] + self.axes[1:] + axes = [default_index(0)] + self.axes[1:] # preserve dtype if possible if self.ndim == 1: diff --git a/pandas/core/methods/selectn.py b/pandas/core/methods/selectn.py index 283acaca2c117..4cde038cfa052 100644 --- a/pandas/core/methods/selectn.py +++ b/pandas/core/methods/selectn.py @@ -199,7 +199,10 @@ def __init__(self, obj: DataFrame, n: int, keep: str, columns: IndexLabel) -> No self.columns = columns def compute(self, method: str) -> DataFrame: - from pandas.core.api import Index + from pandas.core.api import ( + Index, + default_index, + ) n = self.n frame = self.obj @@ -227,7 +230,7 @@ def get_indexer(current_indexer: Index, other_indexer: Index) -> Index: original_index = frame.index cur_frame = frame = frame.reset_index(drop=True) cur_n = n - indexer = Index([], dtype=np.int64) + indexer = default_index(0) for i, column in enumerate(columns): # For each column we apply method to cur_frame[column]. diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index a8efae8da82c8..664ac57fcc823 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -42,7 +42,7 @@ from pandas.core.indexes.api import ( Index, MultiIndex, - RangeIndex, + default_index, ) from pandas.core.reshape.concat import concat from pandas.core.series import Series @@ -1047,7 +1047,7 @@ def stack_reshape( if data.ndim == 1: data.name = 0 else: - data.columns = RangeIndex(len(data.columns)) + data.columns = default_index(len(data.columns)) buf.append(data) if len(buf) > 0 and not frame.empty: diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 32ae4c0ff2f50..4a9fce665a319 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -926,3 +926,12 @@ def test_datelike_numeric_only(self, expected_data, expected_index, axis): expected_data, name=0.5, index=Index(expected_index), dtype=np.float64 ) tm.assert_series_equal(result, expected) + + +def test_multi_quantile_numeric_only_retains_columns(): + df = DataFrame(list("abc")) + result = df.quantile([0.5, 0.7], numeric_only=True) + expected = DataFrame(index=[0.5, 0.7]) + tm.assert_frame_equal( + result, expected, check_index_type=True, check_column_type=True + ) From eff7c466af5c4ff464e8f1d6edc99367b83ec7a3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 17 Jun 2024 11:25:16 -0700 Subject: [PATCH 2/4] fix tests --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/methods/selectn.py | 8 +++----- pandas/tests/frame/methods/test_quantile.py | 4 ++-- pandas/tests/generic/test_generic.py | 8 ++++++-- pandas/tests/indexes/test_base.py | 2 +- pandas/tests/series/methods/test_get_numeric_data.py | 5 ++--- 6 files changed, 15 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 80e5e89b79690..cf9543ee7e465 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -504,7 +504,7 @@ Timezones Numeric ^^^^^^^ - Bug in ``np.matmul`` with :class:`Index` inputs raising a ``TypeError`` (:issue:`57079`) -- +- Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`?``) Conversion ^^^^^^^^^^ diff --git a/pandas/core/methods/selectn.py b/pandas/core/methods/selectn.py index 4cde038cfa052..c766e84286afa 100644 --- a/pandas/core/methods/selectn.py +++ b/pandas/core/methods/selectn.py @@ -29,6 +29,8 @@ ) from pandas.core.dtypes.dtypes import BaseMaskedDtype +from pandas.core.indexes.api import default_index + if TYPE_CHECKING: from pandas._typing import ( DtypeObj, @@ -38,6 +40,7 @@ from pandas import ( DataFrame, + Index, Series, ) else: @@ -199,11 +202,6 @@ def __init__(self, obj: DataFrame, n: int, keep: str, columns: IndexLabel) -> No self.columns = columns def compute(self, method: str) -> DataFrame: - from pandas.core.api import ( - Index, - default_index, - ) - n = self.n frame = self.obj columns = self.columns diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 4a9fce665a319..f35b77da0b547 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -710,14 +710,14 @@ def test_quantile_empty_no_columns(self, interp_method): result = df.quantile( 0.5, numeric_only=True, interpolation=interpolation, method=method ) - expected = Series([], index=[], name=0.5, dtype=np.float64) + expected = Series([], name=0.5, dtype=np.float64) expected.index.name = "captain tightpants" tm.assert_series_equal(result, expected) result = df.quantile( [0.5], numeric_only=True, interpolation=interpolation, method=method ) - expected = DataFrame([], index=[0.5], columns=[]) + expected = DataFrame([], index=[0.5]) expected.columns.name = "captain tightpants" tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 0b607d91baf65..b591b1b1092d4 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -93,8 +93,7 @@ def test_get_numeric_data(self, frame_or_series): if isinstance(o, DataFrame): # preserve columns dtype expected.columns = o.columns[:0] - # https://github.com/pandas-dev/pandas/issues/50862 - tm.assert_equal(result.reset_index(drop=True), expected) + tm.assert_equal(result, expected) # get the bool data arr = np.array([True, True, False, True]) @@ -102,6 +101,11 @@ def test_get_numeric_data(self, frame_or_series): result = o._get_numeric_data() tm.assert_equal(result, o) + def test_get_bool_data_empty_preserve_index(self): + expected = Series([], dtype="bool") + result = expected._get_bool_data() + tm.assert_series_equal(result, expected, check_index_type=True) + def test_nonzero(self, frame_or_series): # GH 4633 # look at the boolean/nonzero behavior for objects diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index e701a49ea93ad..16908fbb4fecc 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1558,7 +1558,7 @@ def test_ensure_index_uint64(self): def test_get_combined_index(self): result = _get_combined_index([]) - expected = Index([]) + expected = RangeIndex(0) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/series/methods/test_get_numeric_data.py b/pandas/tests/series/methods/test_get_numeric_data.py index f25583904377a..4a11d7905f506 100644 --- a/pandas/tests/series/methods/test_get_numeric_data.py +++ b/pandas/tests/series/methods/test_get_numeric_data.py @@ -1,5 +1,4 @@ from pandas import ( - Index, Series, date_range, ) @@ -19,7 +18,7 @@ def test_get_numeric_data_preserve_dtype(self): obj = Series([1, "2", 3.0]) result = obj._get_numeric_data() - expected = Series([], dtype=object, index=Index([], dtype=object)) + expected = Series([], dtype=object) tm.assert_series_equal(result, expected) obj = Series([True, False, True]) @@ -28,5 +27,5 @@ def test_get_numeric_data_preserve_dtype(self): obj = Series(date_range("20130101", periods=3)) result = obj._get_numeric_data() - expected = Series([], dtype="M8[ns]", index=Index([], dtype=object)) + expected = Series([], dtype="M8[ns]") tm.assert_series_equal(result, expected) From 9fc6589a98cfe785794cd6592539f7e2a03e9a1d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 17 Jun 2024 11:28:49 -0700 Subject: [PATCH 3/4] Update number --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index cf9543ee7e465..eb6679eb61dbd 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -503,8 +503,8 @@ Timezones Numeric ^^^^^^^ +- Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`59035`) - Bug in ``np.matmul`` with :class:`Index` inputs raising a ``TypeError`` (:issue:`57079`) -- Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`?``) Conversion ^^^^^^^^^^ From 7319e6a690bfb0e12fd05af930107dabb28663cd Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 17 Jun 2024 12:15:02 -0700 Subject: [PATCH 4/4] Address typing --- pandas/core/indexes/api.py | 2 +- pandas/core/methods/selectn.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 628790dbd8c6d..5144e647e73b4 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -130,7 +130,7 @@ def _get_combined_index( # TODO: handle index names! indexes = _get_distinct_objs(indexes) if len(indexes) == 0: - index = default_index(0) + index: Index = default_index(0) elif len(indexes) == 1: index = indexes[0] elif intersect: diff --git a/pandas/core/methods/selectn.py b/pandas/core/methods/selectn.py index c766e84286afa..02e7445f1d275 100644 --- a/pandas/core/methods/selectn.py +++ b/pandas/core/methods/selectn.py @@ -228,7 +228,7 @@ def get_indexer(current_indexer: Index, other_indexer: Index) -> Index: original_index = frame.index cur_frame = frame = frame.reset_index(drop=True) cur_n = n - indexer = default_index(0) + indexer: Index = default_index(0) for i, column in enumerate(columns): # For each column we apply method to cur_frame[column].