Skip to content

Commit 1e98a41

Browse files
committed
Merge remote-tracking branch 'upstream/main' into ref/is_range_indexer/step
2 parents c9339d5 + f5d754d commit 1e98a41

31 files changed

+209
-188
lines changed

doc/source/whatsnew/v3.0.0.rst

+6-4
Original file line numberDiff line numberDiff line change
@@ -272,11 +272,11 @@ Performance improvements
272272
- Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`)
273273
- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
274274
- Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
275-
- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
275+
- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask or integers returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
276276
- Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`)
277-
- Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`)
278-
- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`)
279-
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`)
277+
- Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`, :issue:`57752`)
278+
- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
279+
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
280280
- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
281281
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
282282

@@ -289,6 +289,7 @@ Bug fixes
289289
- Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
290290
- Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
291291
- Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
292+
- Fixed bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
292293
- Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
293294
- Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
294295

@@ -393,6 +394,7 @@ Other
393394
^^^^^
394395
- Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
395396
- Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
397+
- Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
396398
- Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
397399
- Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
398400
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)

environment.yml

+1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ dependencies:
6262
# downstream packages
6363
- dask-core
6464
- seaborn-base
65+
- dask-expr
6566

6667
# local testing dependencies
6768
- moto

pandas/core/algorithms.py

+4
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,10 @@ def unique_with_mask(values, mask: npt.NDArray[np.bool_] | None = None):
439439
# Dispatch to extension dtype's unique.
440440
return values.unique()
441441

442+
if isinstance(values, ABCIndex):
443+
# Dispatch to Index's unique.
444+
return values.unique()
445+
442446
original = values
443447
hashtable, values = _get_hashtable_algo(values)
444448

pandas/core/groupby/generic.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1642,8 +1642,11 @@ def _wrap_applied_output(
16421642
first_not_none = next(com.not_none(*values), None)
16431643

16441644
if first_not_none is None:
1645-
# GH9684 - All values are None, return an empty frame.
1646-
return self.obj._constructor()
1645+
# GH9684 - All values are None, return an empty frame
1646+
# GH57775 - Ensure that columns and dtypes from original frame are kept.
1647+
result = self.obj._constructor(columns=data.columns)
1648+
result = result.astype(data.dtypes)
1649+
return result
16471650
elif isinstance(first_not_none, DataFrame):
16481651
return self._concat_objects(
16491652
values,

pandas/core/groupby/groupby.py

+8
Original file line numberDiff line numberDiff line change
@@ -1636,6 +1636,14 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT:
16361636
a 5
16371637
b 2
16381638
dtype: int64
1639+
1640+
Example 4: The function passed to ``apply`` returns ``None`` for one of the
1641+
group. This group is filtered from the result:
1642+
1643+
>>> g1.apply(lambda x: None if x.iloc[0, 0] == 3 else x, include_groups=False)
1644+
B C
1645+
0 1 4
1646+
1 2 6
16391647
"""
16401648
if isinstance(func, str):
16411649
if hasattr(self, func):

pandas/core/indexes/base.py

-1
Original file line numberDiff line numberDiff line change
@@ -4235,7 +4235,6 @@ def join(
42354235

42364236
return self._join_via_get_indexer(other, how, sort)
42374237

4238-
@final
42394238
def _join_empty(
42404239
self, other: Index, how: JoinHow, sort: bool
42414240
) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

pandas/core/indexes/multi.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2063,7 +2063,7 @@ def remove_unused_levels(self) -> MultiIndex:
20632063
20642064
>>> mi2 = mi[2:].remove_unused_levels()
20652065
>>> mi2.levels
2066-
(Index([1], dtype='int64'), Index(['a', 'b'], dtype='object'))
2066+
(RangeIndex(start=1, stop=2, step=1), Index(['a', 'b'], dtype='object'))
20672067
"""
20682068
new_levels = []
20692069
new_codes = []

pandas/core/indexes/range.py

+27-9
Original file line numberDiff line numberDiff line change
@@ -472,9 +472,15 @@ def _shallow_copy(self, values, name: Hashable = no_default):
472472

473473
if values.dtype.kind == "f":
474474
return Index(values, name=name, dtype=np.float64)
475-
if values.dtype.kind == "i" and values.ndim == 1 and len(values) > 1:
475+
if values.dtype.kind == "i" and values.ndim == 1:
476476
# GH 46675 & 43885: If values is equally spaced, return a
477477
# more memory-compact RangeIndex instead of Index with 64-bit dtype
478+
if len(values) == 0:
479+
return type(self)._simple_new(_empty_range, name=name)
480+
elif len(values) == 1:
481+
start = values[0]
482+
new_range = range(start, start + self.step, self.step)
483+
return type(self)._simple_new(new_range, name=name)
478484
diff = values[1] - values[0]
479485
if not missing.isna(diff) and lib.is_range(values, diff):
480486
new_range = range(values[0], values[-1] + diff, diff)
@@ -889,12 +895,19 @@ def symmetric_difference(
889895
result = result.rename(result_name)
890896
return result
891897

898+
def _join_empty(
899+
self, other: Index, how: JoinHow, sort: bool
900+
) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
901+
if other.dtype.kind == "i":
902+
other = self._shallow_copy(other._values, name=other.name)
903+
return super()._join_empty(other, how=how, sort=sort)
904+
892905
def _join_monotonic(
893906
self, other: Index, how: JoinHow = "left"
894907
) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
895908
# This currently only gets called for the monotonic increasing case
896909
if not isinstance(other, type(self)):
897-
maybe_ri = self._shallow_copy(other._values)
910+
maybe_ri = self._shallow_copy(other._values, name=other.name)
898911
if not isinstance(maybe_ri, type(self)):
899912
return super()._join_monotonic(other, how=how)
900913
other = maybe_ri
@@ -1070,6 +1083,8 @@ def __getitem__(self, key):
10701083
"""
10711084
Conserve RangeIndex type for scalar and slice keys.
10721085
"""
1086+
if key is Ellipsis:
1087+
key = slice(None)
10731088
if isinstance(key, slice):
10741089
return self._getitem_slice(key)
10751090
elif is_integer(key):
@@ -1089,17 +1104,20 @@ def __getitem__(self, key):
10891104
)
10901105
elif com.is_bool_indexer(key):
10911106
if isinstance(getattr(key, "dtype", None), ExtensionDtype):
1092-
np_key = key.to_numpy(dtype=bool, na_value=False)
1107+
key = key.to_numpy(dtype=bool, na_value=False)
10931108
else:
1094-
np_key = np.asarray(key, dtype=bool)
1095-
check_array_indexer(self._range, np_key) # type: ignore[arg-type]
1109+
key = np.asarray(key, dtype=bool)
1110+
check_array_indexer(self._range, key) # type: ignore[arg-type]
10961111
# Short circuit potential _shallow_copy check
1097-
if np_key.all():
1112+
if key.all():
10981113
return self._simple_new(self._range, name=self.name)
1099-
elif not np_key.any():
1114+
elif not key.any():
11001115
return self._simple_new(_empty_range, name=self.name)
1101-
return self.take(np.flatnonzero(np_key))
1102-
return super().__getitem__(key)
1116+
key = np.flatnonzero(key)
1117+
try:
1118+
return self.take(key)
1119+
except (TypeError, ValueError):
1120+
return super().__getitem__(key)
11031121

11041122
def _getitem_slice(self, slobj: slice) -> Self:
11051123
"""

pandas/core/internals/__init__.py

+2-12
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@ def __getattr__(name: str):
3535
return create_block_manager_from_blocks
3636

3737
if name in [
38-
"NumericBlock",
39-
"ObjectBlock",
4038
"Block",
4139
"ExtensionBlock",
4240
"DatetimeTZBlock",
@@ -49,25 +47,17 @@ def __getattr__(name: str):
4947
# on hard-coding stacklevel
5048
stacklevel=2,
5149
)
52-
if name == "NumericBlock":
53-
from pandas.core.internals.blocks import NumericBlock
54-
55-
return NumericBlock
56-
elif name == "DatetimeTZBlock":
50+
if name == "DatetimeTZBlock":
5751
from pandas.core.internals.blocks import DatetimeTZBlock
5852

5953
return DatetimeTZBlock
6054
elif name == "ExtensionBlock":
6155
from pandas.core.internals.blocks import ExtensionBlock
6256

6357
return ExtensionBlock
64-
elif name == "Block":
58+
else:
6559
from pandas.core.internals.blocks import Block
6660

6761
return Block
68-
else:
69-
from pandas.core.internals.blocks import ObjectBlock
70-
71-
return ObjectBlock
7262

7363
raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'")

pandas/core/internals/blocks.py

-12
Original file line numberDiff line numberDiff line change
@@ -2148,18 +2148,6 @@ def is_numeric(self) -> bool: # type: ignore[override]
21482148
return kind in "fciub"
21492149

21502150

2151-
class NumericBlock(NumpyBlock):
2152-
# this Block type is kept for backwards-compatibility
2153-
# TODO(3.0): delete and remove deprecation in __init__.py.
2154-
__slots__ = ()
2155-
2156-
2157-
class ObjectBlock(NumpyBlock):
2158-
# this Block type is kept for backwards-compatibility
2159-
# TODO(3.0): delete and remove deprecation in __init__.py.
2160-
__slots__ = ()
2161-
2162-
21632151
class NDArrayBackedExtensionBlock(EABackedBlock):
21642152
"""
21652153
Block backed by an NDArrayBackedExtensionArray

pandas/core/methods/selectn.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ def compute(self, method: str) -> DataFrame:
213213
f"cannot use method {method!r} with this dtype"
214214
)
215215

216-
def get_indexer(current_indexer, other_indexer):
216+
def get_indexer(current_indexer: Index, other_indexer: Index) -> Index:
217217
"""
218218
Helper function to concat `current_indexer` and `other_indexer`
219219
depending on `method`

pandas/core/methods/to_dict.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,8 @@ def to_dict(
155155
stacklevel=find_stack_level(),
156156
)
157157
# GH16122
158-
into_c = com.standardize_mapping(into)
158+
# error: Call to untyped function "standardize_mapping" in typed context
159+
into_c = com.standardize_mapping(into) # type: ignore[no-untyped-call]
159160

160161
# error: Incompatible types in assignment (expression has type "str",
161162
# variable has type "Literal['dict', 'list', 'series', 'split', 'tight',

pandas/core/ops/invalid.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,26 @@
77
import operator
88
from typing import (
99
TYPE_CHECKING,
10+
Any,
1011
Callable,
1112
NoReturn,
1213
)
1314

1415
import numpy as np
1516

1617
if TYPE_CHECKING:
17-
from pandas._typing import npt
18+
from pandas._typing import (
19+
ArrayLike,
20+
Scalar,
21+
npt,
22+
)
1823

1924

20-
def invalid_comparison(left, right, op) -> npt.NDArray[np.bool_]:
25+
def invalid_comparison(
26+
left: ArrayLike,
27+
right: ArrayLike | Scalar,
28+
op: Callable[[Any, Any], bool],
29+
) -> npt.NDArray[np.bool_]:
2130
"""
2231
If a comparison has mismatched types and is not necessarily meaningful,
2332
follow python3 conventions by:
@@ -59,7 +68,7 @@ def make_invalid_op(name: str) -> Callable[..., NoReturn]:
5968
invalid_op : function
6069
"""
6170

62-
def invalid_op(self, other=None) -> NoReturn:
71+
def invalid_op(self: object, other: object = None) -> NoReturn:
6372
typ = type(self).__name__
6473
raise TypeError(f"cannot perform {name} with this index type: {typ}")
6574

pandas/io/common.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ def stringify_path(
278278
return _expand_user(filepath_or_buffer)
279279

280280

281-
def urlopen(*args, **kwargs):
281+
def urlopen(*args: Any, **kwargs: Any) -> Any:
282282
"""
283283
Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of
284284
the stdlib.
@@ -972,7 +972,7 @@ def __init__(
972972
mode: Literal["r", "a", "w", "x"] = "r",
973973
fileobj: ReadBuffer[bytes] | WriteBuffer[bytes] | None = None,
974974
archive_name: str | None = None,
975-
**kwargs,
975+
**kwargs: Any,
976976
) -> None:
977977
super().__init__()
978978
self.archive_name = archive_name
@@ -1025,7 +1025,7 @@ def __init__(
10251025
file: FilePath | ReadBuffer[bytes] | WriteBuffer[bytes],
10261026
mode: str,
10271027
archive_name: str | None = None,
1028-
**kwargs,
1028+
**kwargs: Any,
10291029
) -> None:
10301030
super().__init__()
10311031
mode = mode.replace("b", "")

pandas/io/excel/_odswriter.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
)
1919

2020
if TYPE_CHECKING:
21+
from odf.opendocument import OpenDocumentSpreadsheet
22+
2123
from pandas._typing import (
2224
ExcelWriterIfSheetExists,
2325
FilePath,
@@ -37,12 +39,12 @@ def __init__(
3739
path: FilePath | WriteExcelBuffer | ExcelWriter,
3840
engine: str | None = None,
3941
date_format: str | None = None,
40-
datetime_format=None,
42+
datetime_format: str | None = None,
4143
mode: str = "w",
4244
storage_options: StorageOptions | None = None,
4345
if_sheet_exists: ExcelWriterIfSheetExists | None = None,
4446
engine_kwargs: dict[str, Any] | None = None,
45-
**kwargs,
47+
**kwargs: Any,
4648
) -> None:
4749
from odf.opendocument import OpenDocumentSpreadsheet
4850

@@ -63,7 +65,7 @@ def __init__(
6365
self._style_dict: dict[str, str] = {}
6466

6567
@property
66-
def book(self):
68+
def book(self) -> OpenDocumentSpreadsheet:
6769
"""
6870
Book instance of class odf.opendocument.OpenDocumentSpreadsheet.
6971
@@ -149,7 +151,7 @@ def _write_cells(
149151
for row_nr in range(max(rows.keys()) + 1):
150152
wks.addElement(rows[row_nr])
151153

152-
def _make_table_cell_attributes(self, cell) -> dict[str, int | str]:
154+
def _make_table_cell_attributes(self, cell: ExcelCell) -> dict[str, int | str]:
153155
"""Convert cell attributes to OpenDocument attributes
154156
155157
Parameters
@@ -171,7 +173,7 @@ def _make_table_cell_attributes(self, cell) -> dict[str, int | str]:
171173
attributes["numbercolumnsspanned"] = cell.mergeend
172174
return attributes
173175

174-
def _make_table_cell(self, cell) -> tuple[object, Any]:
176+
def _make_table_cell(self, cell: ExcelCell) -> tuple[object, Any]:
175177
"""Convert cell data to an OpenDocument spreadsheet cell
176178
177179
Parameters

pandas/io/formats/css.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ def _side_expander(prop_fmt: str) -> Callable:
3636
function: Return to call when a 'border(-{side}): {value}' string is encountered
3737
"""
3838

39-
def expand(self, prop: str, value: str) -> Generator[tuple[str, str], None, None]:
39+
def expand(
40+
self: CSSResolver, prop: str, value: str
41+
) -> Generator[tuple[str, str], None, None]:
4042
"""
4143
Expand shorthand property into side-specific property (top, right, bottom, left)
4244
@@ -81,7 +83,9 @@ def _border_expander(side: str = "") -> Callable:
8183
if side != "":
8284
side = f"-{side}"
8385

84-
def expand(self, prop: str, value: str) -> Generator[tuple[str, str], None, None]:
86+
def expand(
87+
self: CSSResolver, prop: str, value: str
88+
) -> Generator[tuple[str, str], None, None]:
8589
"""
8690
Expand border into color, style, and width tuples
8791
@@ -343,7 +347,9 @@ def _update_other_units(self, props: dict[str, str]) -> dict[str, str]:
343347
)
344348
return props
345349

346-
def size_to_pt(self, in_val, em_pt=None, conversions=UNIT_RATIOS) -> str:
350+
def size_to_pt(
351+
self, in_val: str, em_pt: float | None = None, conversions: dict = UNIT_RATIOS
352+
) -> str:
347353
def _error() -> str:
348354
warnings.warn(
349355
f"Unhandled size: {in_val!r}",

0 commit comments

Comments
 (0)