Skip to content

Commit babee91

Browse files
Merge remote-tracking branch 'upstream/master' into bisect
2 parents c4eb7b1 + 2001798 commit babee91

File tree

26 files changed

+245
-225
lines changed

26 files changed

+245
-225
lines changed

.github/workflows/python-dev.yml

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ on:
88
pull_request:
99
branches:
1010
- master
11+
- 1.3.x
1112
paths-ignore:
1213
- "doc/**"
1314

doc/source/whatsnew/v1.3.1.rst

+5-3
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,13 @@ including other versions of pandas.
1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Pandas could not be built on PyPy (:issue:`42355`)
18-
- :class:`DataFrame` constructed with with an older version of pandas could not be unpickled (:issue:`42345`)
19-
- Performance regression in constructing a :class:`DataFrame` from a dictionary of dictionaries (:issue:`42338`)
18+
- :class:`DataFrame` constructed with an older version of pandas could not be unpickled (:issue:`42345`)
19+
- Performance regression in constructing a :class:`DataFrame` from a dictionary of dictionaries (:issue:`42248`)
2020
- Fixed regression in :meth:`DataFrame.agg` dropping values when the DataFrame had an Extension Array dtype, a duplicate index, and ``axis=1`` (:issue:`42380`)
21+
- Fixed regression in :meth:`DataFrame.astype` changing the order of noncontiguous data (:issue:`42396`)
22+
- Performance regression in :class:`DataFrame` in reduction operations requiring casting such as :meth:`DataFrame.mean` on integer data (:issue:`38592`)
2123
- Performance regression in :meth:`DataFrame.to_dict` and :meth:`Series.to_dict` when ``orient`` argument one of "records", "dict", or "split" (:issue:`42352`)
22-
- Fixed regression in indexing with a ``list`` subclass incorrectly raising ``TypeError`` (:issue:`42433`, :issue:42461`)
24+
- Fixed regression in indexing with a ``list`` subclass incorrectly raising ``TypeError`` (:issue:`42433`, :issue:`42461`)
2325
-
2426

2527
.. ---------------------------------------------------------------------------

doc/source/whatsnew/v1.4.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ Other enhancements
3131
^^^^^^^^^^^^^^^^^^
3232
- Add support for assigning values to ``by`` argument in :meth:`DataFrame.plot.hist` and :meth:`DataFrame.plot.box` (:issue:`15079`)
3333
- :meth:`Series.sample`, :meth:`DataFrame.sample`, and :meth:`.GroupBy.sample` now accept a ``np.random.Generator`` as input to ``random_state``. A generator will be more performant, especially with ``replace=False`` (:issue:`38100`)
34-
- Additional options added to :meth:`.Styler.bar` to control alignment and display (:issue:`26070`)
34+
- Additional options added to :meth:`.Styler.bar` to control alignment and display, with keyword only arguments (:issue:`26070`, :issue:`36419`)
35+
- :meth:`Styler.bar` now validates the input argument ``width`` and ``height`` (:issue:`42511`)
3536
- :meth:`Series.ewm`, :meth:`DataFrame.ewm`, now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview <window.overview>` for performance and functional benefits (:issue:`42273`)
3637
-
3738

pandas/core/algorithms.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
ArrayLike,
2828
DtypeObj,
2929
Scalar,
30+
npt,
3031
)
3132
from pandas.util._decorators import doc
3233

@@ -528,7 +529,7 @@ def factorize_array(
528529
size_hint: int | None = None,
529530
na_value=None,
530531
mask: np.ndarray | None = None,
531-
) -> tuple[np.ndarray, np.ndarray]:
532+
) -> tuple[npt.NDArray[np.intp], np.ndarray]:
532533
"""
533534
Factorize an array-like to codes and uniques.
534535

pandas/core/arrays/categorical.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
Ordered,
3838
Scalar,
3939
Shape,
40+
npt,
4041
type_t,
4142
)
4243
from pandas.compat.numpy import function as nv
@@ -2048,7 +2049,7 @@ def _validate_setitem_value(self, value):
20482049
codes = self.categories.get_indexer(rvalue)
20492050
return codes.astype(self._ndarray.dtype, copy=False)
20502051

2051-
def _reverse_indexer(self) -> dict[Hashable, np.ndarray]:
2052+
def _reverse_indexer(self) -> dict[Hashable, npt.NDArray[np.intp]]:
20522053
"""
20532054
Compute the inverse of a categorical, returning
20542055
a dict of categories -> indexers.

pandas/core/dtypes/cast.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from typing import (
1515
TYPE_CHECKING,
1616
Any,
17-
Literal,
1817
Sized,
1918
TypeVar,
2019
cast,
@@ -1093,14 +1092,11 @@ def astype_nansafe(
10931092
The dtype was a datetime64/timedelta64 dtype, but it had no unit.
10941093
"""
10951094
if arr.ndim > 1:
1096-
# Make sure we are doing non-copy ravel and reshape.
1097-
flags = arr.flags
1098-
flat = arr.ravel("K")
1095+
flat = arr.ravel()
10991096
result = astype_nansafe(flat, dtype, copy=copy, skipna=skipna)
1100-
order: Literal["C", "F"] = "F" if flags.f_contiguous else "C"
11011097
# error: Item "ExtensionArray" of "Union[ExtensionArray, ndarray]" has no
11021098
# attribute "reshape"
1103-
return result.reshape(arr.shape, order=order) # type: ignore[union-attr]
1099+
return result.reshape(arr.shape) # type: ignore[union-attr]
11041100

11051101
# We get here with 0-dim from sparse
11061102
arr = np.atleast_1d(arr)

pandas/core/generic.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -10499,6 +10499,7 @@ def mad(self, axis=None, skipna=None, level=None):
1049910499
name1=name1,
1050010500
name2=name2,
1050110501
axis_descr=axis_descr,
10502+
notes="",
1050210503
)
1050310504
def sem(
1050410505
self,
@@ -10520,6 +10521,7 @@ def sem(
1052010521
name1=name1,
1052110522
name2=name2,
1052210523
axis_descr=axis_descr,
10524+
notes="",
1052310525
)
1052410526
def var(
1052510527
self,
@@ -10542,6 +10544,7 @@ def var(
1054210544
name1=name1,
1054310545
name2=name2,
1054410546
axis_descr=axis_descr,
10547+
notes=_std_notes,
1054510548
)
1054610549
def std(
1054710550
self,
@@ -11034,12 +11037,16 @@ def _doc_params(cls):
1103411037
1103511038
Returns
1103611039
-------
11037-
{name1} or {name2} (if level specified)
11040+
{name1} or {name2} (if level specified) \
11041+
{notes}
11042+
"""
11043+
11044+
_std_notes = """
1103811045
1103911046
Notes
1104011047
-----
1104111048
To have the same behaviour as `numpy.std`, use `ddof=0` (instead of the
11042-
default `ddof=1`)\n"""
11049+
default `ddof=1`)"""
1104311050

1104411051
_bool_doc = """
1104511052
{desc}

pandas/core/groupby/ops.py

+47-80
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
FrameOrSeries,
3333
Shape,
3434
final,
35+
npt,
3536
)
3637
from pandas.errors import AbstractMethodError
3738
from pandas.util._decorators import cache_readonly
@@ -341,95 +342,54 @@ def _ea_wrap_cython_operation(
341342
comp_ids=comp_ids,
342343
**kwargs,
343344
)
344-
orig_values = values
345345

346-
if isinstance(orig_values, (DatetimeArray, PeriodArray)):
346+
if isinstance(values, (DatetimeArray, PeriodArray, TimedeltaArray)):
347347
# All of the functions implemented here are ordinal, so we can
348348
# operate on the tz-naive equivalents
349-
npvalues = orig_values._ndarray.view("M8[ns]")
350-
res_values = self._cython_op_ndim_compat(
351-
npvalues,
352-
min_count=min_count,
353-
ngroups=ngroups,
354-
comp_ids=comp_ids,
355-
mask=None,
356-
**kwargs,
357-
)
358-
if self.how in ["rank"]:
359-
# i.e. how in WrappedCythonOp.cast_blocklist, since
360-
# other cast_blocklist methods dont go through cython_operation
361-
# preserve float64 dtype
362-
return res_values
363-
364-
res_values = res_values.view("i8")
365-
result = type(orig_values)(res_values, dtype=orig_values.dtype)
366-
return result
367-
368-
elif isinstance(orig_values, TimedeltaArray):
369-
# We have an ExtensionArray but not ExtensionDtype
370-
res_values = self._cython_op_ndim_compat(
371-
orig_values._ndarray,
372-
min_count=min_count,
373-
ngroups=ngroups,
374-
comp_ids=comp_ids,
375-
mask=None,
376-
**kwargs,
377-
)
378-
if self.how in ["rank"]:
379-
# i.e. how in WrappedCythonOp.cast_blocklist, since
380-
# other cast_blocklist methods dont go through cython_operation
381-
# preserve float64 dtype
382-
return res_values
383-
384-
# otherwise res_values has the same dtype as original values
385-
return type(orig_values)(res_values)
386-
349+
npvalues = values._ndarray.view("M8[ns]")
387350
elif isinstance(values.dtype, (BooleanDtype, _IntegerDtype)):
388351
# IntegerArray or BooleanArray
389352
npvalues = values.to_numpy("float64", na_value=np.nan)
390-
res_values = self._cython_op_ndim_compat(
391-
npvalues,
392-
min_count=min_count,
393-
ngroups=ngroups,
394-
comp_ids=comp_ids,
395-
mask=None,
396-
**kwargs,
397-
)
398-
if self.how in ["rank"]:
399-
# i.e. how in WrappedCythonOp.cast_blocklist, since
400-
# other cast_blocklist methods dont go through cython_operation
401-
return res_values
402-
403-
dtype = self._get_result_dtype(orig_values.dtype)
404-
cls = dtype.construct_array_type()
405-
return cls._from_sequence(res_values, dtype=dtype)
406-
407353
elif isinstance(values.dtype, FloatingDtype):
408354
# FloatingArray
409-
npvalues = values.to_numpy(
410-
values.dtype.numpy_dtype,
411-
na_value=np.nan,
412-
)
413-
res_values = self._cython_op_ndim_compat(
414-
npvalues,
415-
min_count=min_count,
416-
ngroups=ngroups,
417-
comp_ids=comp_ids,
418-
mask=None,
419-
**kwargs,
355+
npvalues = values.to_numpy(values.dtype.numpy_dtype, na_value=np.nan)
356+
else:
357+
raise NotImplementedError(
358+
f"function is not implemented for this dtype: {values.dtype}"
420359
)
421-
if self.how in ["rank"]:
422-
# i.e. how in WrappedCythonOp.cast_blocklist, since
423-
# other cast_blocklist methods dont go through cython_operation
424-
return res_values
425360

426-
dtype = self._get_result_dtype(orig_values.dtype)
361+
res_values = self._cython_op_ndim_compat(
362+
npvalues,
363+
min_count=min_count,
364+
ngroups=ngroups,
365+
comp_ids=comp_ids,
366+
mask=None,
367+
**kwargs,
368+
)
369+
370+
if self.how in ["rank"]:
371+
# i.e. how in WrappedCythonOp.cast_blocklist, since
372+
# other cast_blocklist methods dont go through cython_operation
373+
return res_values
374+
375+
return self._reconstruct_ea_result(values, res_values)
376+
377+
def _reconstruct_ea_result(self, values, res_values):
378+
"""
379+
Construct an ExtensionArray result from an ndarray result.
380+
"""
381+
# TODO: allow EAs to override this logic
382+
383+
if isinstance(values.dtype, (BooleanDtype, _IntegerDtype, FloatingDtype)):
384+
dtype = self._get_result_dtype(values.dtype)
427385
cls = dtype.construct_array_type()
428386
return cls._from_sequence(res_values, dtype=dtype)
429387

430-
raise NotImplementedError(
431-
f"function is not implemented for this dtype: {values.dtype}"
432-
)
388+
elif needs_i8_conversion(values.dtype):
389+
i8values = res_values.view("i8")
390+
return type(values)(i8values, dtype=values.dtype)
391+
392+
raise NotImplementedError
433393

434394
@final
435395
def _masked_ea_wrap_cython_operation(
@@ -478,6 +438,8 @@ def _cython_op_ndim_compat(
478438
if values.ndim == 1:
479439
# expand to 2d, dispatch, then squeeze if appropriate
480440
values2d = values[None, :]
441+
if mask is not None:
442+
mask = mask[None, :]
481443
res = self._call_cython_op(
482444
values2d,
483445
min_count=min_count,
@@ -533,9 +495,8 @@ def _call_cython_op(
533495
values = ensure_float64(values)
534496

535497
values = values.T
536-
537498
if mask is not None:
538-
mask = mask.reshape(values.shape, order="C")
499+
mask = mask.T
539500

540501
out_shape = self._get_output_shape(ngroups, values)
541502
func, values = self.get_cython_func_and_vals(values, is_numeric)
@@ -677,7 +638,7 @@ def __init__(
677638
sort: bool = True,
678639
group_keys: bool = True,
679640
mutated: bool = False,
680-
indexer: np.ndarray | None = None,
641+
indexer: npt.NDArray[np.intp] | None = None,
681642
dropna: bool = True,
682643
):
683644
assert isinstance(axis, Index), axis
@@ -1268,7 +1229,13 @@ def _is_indexed_like(obj, axes, axis: int) -> bool:
12681229

12691230

12701231
class DataSplitter(Generic[FrameOrSeries]):
1271-
def __init__(self, data: FrameOrSeries, labels, ngroups: int, axis: int = 0):
1232+
def __init__(
1233+
self,
1234+
data: FrameOrSeries,
1235+
labels: npt.NDArray[np.intp],
1236+
ngroups: int,
1237+
axis: int = 0,
1238+
):
12721239
self.data = data
12731240
self.labels = ensure_platform_int(labels) # _should_ already be np.intp
12741241
self.ngroups = ngroups

0 commit comments

Comments
 (0)