Skip to content

Commit 9f93fa4

Browse files
committed
Merge branch 'main' into np-array-copy-keyword
2 parents f66cd05 + 1bf86a3 commit 9f93fa4

40 files changed

+268
-49
lines changed

doc/source/whatsnew/v3.0.0.rst

+5-2
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ Removal of prior version deprecations/changes
244244

245245
Performance improvements
246246
~~~~~~~~~~~~~~~~~~~~~~~~
247+
- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`57542`)
247248
- Performance improvement in :class:`DataFrame` when ``data`` is a ``dict`` and ``columns`` is specified (:issue:`24368`)
248249
- Performance improvement in :meth:`DataFrame.join` for sorted but non-unique indexes (:issue:`56941`)
249250
- Performance improvement in :meth:`DataFrame.join` when left and/or right are non-unique and ``how`` is ``"left"``, ``"right"``, or ``"inner"`` (:issue:`56817`)
@@ -252,11 +253,11 @@ Performance improvements
252253
- Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`)
253254
- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
254255
- Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
256+
- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
255257
- Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`)
256258
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`)
257-
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
258-
- :meth:`Series.str.extract` returns a :class:`RangeIndex` columns instead of an :class:`Index` column when possible (:issue:`?``)
259259
- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
260+
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
260261

261262
.. ---------------------------------------------------------------------------
262263
.. _whatsnew_300.bug_fixes:
@@ -265,6 +266,7 @@ Bug fixes
265266
~~~~~~~~~
266267
- Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
267268
- Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
269+
- Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
268270
- Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
269271

270272
Categorical
@@ -324,6 +326,7 @@ MultiIndex
324326

325327
I/O
326328
^^^
329+
- Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
327330
-
328331
-
329332

pandas/_libs/src/vendored/ujson/python/objToJSON.c

+1-3
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ typedef struct __NpyArrContext {
7474
npy_intp ndim;
7575
npy_intp index[NPY_MAXDIMS];
7676
int type_num;
77-
PyArray_GetItemFunc *getitem;
7877

7978
char **rowLabels;
8079
char **columnLabels;
@@ -405,7 +404,6 @@ static void NpyArr_iterBegin(JSOBJ _obj, JSONTypeContext *tc) {
405404
}
406405

407406
npyarr->array = (PyObject *)obj;
408-
npyarr->getitem = (PyArray_GetItemFunc *)PyArray_DESCR(obj)->f->getitem;
409407
npyarr->dataptr = PyArray_DATA(obj);
410408
npyarr->ndim = PyArray_NDIM(obj) - 1;
411409
npyarr->curdim = 0;
@@ -492,7 +490,7 @@ static int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) {
492490
((PyObjectEncoder *)tc->encoder)->npyValue = npyarr->dataptr;
493491
((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = npyarr;
494492
} else {
495-
GET_TC(tc)->itemValue = npyarr->getitem(npyarr->dataptr, npyarr->array);
493+
GET_TC(tc)->itemValue = PyArray_GETITEM(arrayobj, npyarr->dataptr);
496494
}
497495

498496
npyarr->dataptr += npyarr->stride;

pandas/core/array_algos/quantile.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
import numpy as np
66

7+
from pandas.compat.numpy import np_version_gt2
8+
79
from pandas.core.dtypes.missing import (
810
isna,
911
na_value_for_dtype,
@@ -102,7 +104,8 @@ def quantile_with_mask(
102104
interpolation=interpolation,
103105
)
104106

105-
result = np.array(result, copy=False)
107+
copy_false = None if np_version_gt2 else False
108+
result = np.array(result, copy=copy_false)
106109
result = result.T
107110

108111
return result
@@ -199,11 +202,12 @@ def _nanpercentile(
199202
_nanpercentile_1d(val, m, qs, na_value, interpolation=interpolation)
200203
for (val, m) in zip(list(values), list(mask))
201204
]
205+
copy_false = None if np_version_gt2 else False
202206
if values.dtype.kind == "f":
203207
# preserve itemsize
204-
result = np.array(result, dtype=values.dtype, copy=False).T
208+
result = np.array(result, dtype=values.dtype, copy=copy_false).T
205209
else:
206-
result = np.array(result, copy=False).T
210+
result = np.array(result, copy=copy_false).T
207211
if (
208212
result.dtype != values.dtype
209213
and not mask.all()

pandas/core/arrays/arrow/array.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -659,7 +659,9 @@ def __arrow_array__(self, type=None):
659659
"""Convert myself to a pyarrow ChunkedArray."""
660660
return self._pa_array
661661

662-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
662+
def __array__(
663+
self, dtype: NpDtype | None = None, copy: bool | None = None
664+
) -> np.ndarray:
663665
"""Correctly construct numpy arrays when passed to `np.asarray()`."""
664666
return self.to_numpy(dtype=dtype)
665667

pandas/core/arrays/base.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,10 @@
2727
lib,
2828
)
2929
from pandas.compat import set_function_name
30-
from pandas.compat.numpy import function as nv
30+
from pandas.compat.numpy import (
31+
function as nv,
32+
np_version_gt2,
33+
)
3134
from pandas.errors import AbstractMethodError
3235
from pandas.util._decorators import (
3336
Appender,
@@ -710,6 +713,8 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
710713
return self
711714
else:
712715
return self.copy()
716+
if np_version_gt2 and not copy:
717+
copy = None
713718

714719
if isinstance(dtype, ExtensionDtype):
715720
cls = dtype.construct_array_type()

pandas/core/arrays/categorical.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1659,7 +1659,9 @@ def _validate_codes_for_dtype(cls, codes, *, dtype: CategoricalDtype) -> np.ndar
16591659
# -------------------------------------------------------------
16601660

16611661
@ravel_compat
1662-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
1662+
def __array__(
1663+
self, dtype: NpDtype | None = None, copy: bool | None = None
1664+
) -> np.ndarray:
16631665
"""
16641666
The numpy array interface.
16651667

pandas/core/arrays/datetimelike.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,9 @@ def _formatter(self, boxed: bool = False) -> Callable[[object], str]:
353353
# ----------------------------------------------------------------
354354
# Array-Like / EA-Interface Methods
355355

356-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
356+
def __array__(
357+
self, dtype: NpDtype | None = None, copy: bool | None = None
358+
) -> np.ndarray:
357359
# used for Timedelta/DatetimeArray, overwritten by PeriodArray
358360
if is_object_dtype(dtype):
359361
return np.array(list(self), dtype=object)

pandas/core/arrays/datetimes.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
tzconversion,
4444
)
4545
from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit
46+
from pandas.compat.numpy import np_version_gt2
4647
from pandas.errors import PerformanceWarning
4748
from pandas.util._exceptions import find_stack_level
4849
from pandas.util._validators import validate_inclusive
@@ -649,12 +650,12 @@ def _resolution_obj(self) -> Resolution:
649650
# ----------------------------------------------------------------
650651
# Array-Like / EA-Interface Methods
651652

652-
def __array__(self, dtype=None) -> np.ndarray:
653+
def __array__(self, dtype=None, copy=None) -> np.ndarray:
653654
if dtype is None and self.tz:
654655
# The default for tz-aware is object, to preserve tz info
655656
dtype = object
656657

657-
return super().__array__(dtype=dtype)
658+
return super().__array__(dtype=dtype, copy=copy)
658659

659660
def __iter__(self) -> Iterator:
660661
"""
@@ -2421,7 +2422,8 @@ def objects_to_datetime64(
24212422
assert errors in ["raise", "coerce"]
24222423

24232424
# if str-dtype, convert
2424-
data = np.array(data, copy=False, dtype=np.object_)
2425+
copy_false = None if np_version_gt2 else False
2426+
data = np.array(data, dtype=np.object_, copy=copy_false)
24252427

24262428
result, tz_parsed = tslib.array_to_datetime(
24272429
data,

pandas/core/arrays/interval.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1564,7 +1564,9 @@ def is_non_overlapping_monotonic(self) -> bool:
15641564
# ---------------------------------------------------------------------
15651565
# Conversion
15661566

1567-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
1567+
def __array__(
1568+
self, dtype: NpDtype | None = None, copy: bool | None = None
1569+
) -> np.ndarray:
15681570
"""
15691571
Return the IntervalArray's data as a numpy array of Interval
15701572
objects (with dtype='object')

pandas/core/arrays/masked.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,9 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
594594

595595
__array_priority__ = 1000 # higher than ndarray so ops dispatch to us
596596

597-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
597+
def __array__(
598+
self, dtype: NpDtype | None = None, copy: bool | None = None
599+
) -> np.ndarray:
598600
"""
599601
the array interface, return my values
600602
We return an object array here to preserve our scalar values

pandas/core/arrays/numeric.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
lib,
1414
missing as libmissing,
1515
)
16+
from pandas.compat.numpy import np_version_gt2
1617
from pandas.errors import AbstractMethodError
1718
from pandas.util._decorators import cache_readonly
1819

@@ -137,6 +138,12 @@ def _coerce_to_data_and_mask(
137138
values, dtype, copy: bool, dtype_cls: type[NumericDtype], default_dtype: np.dtype
138139
):
139140
checker = dtype_cls._checker
141+
if np_version_gt2:
142+
copy_false = None
143+
if not copy:
144+
copy = None
145+
else:
146+
copy_false = False
140147

141148
mask = None
142149
inferred_type = None
@@ -208,9 +215,9 @@ def _coerce_to_data_and_mask(
208215
inferred_type not in ["floating", "mixed-integer-float"]
209216
and not mask.any()
210217
):
211-
values = np.array(original, dtype=dtype, copy=False)
218+
values = np.array(original, dtype=dtype, copy=copy_false)
212219
else:
213-
values = np.array(original, dtype="object", copy=False)
220+
values = np.array(original, dtype="object", copy=copy_false)
214221

215222
# we copy as need to coerce here
216223
if mask.any():

pandas/core/arrays/numpy_.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,9 @@ def dtype(self) -> NumpyEADtype:
150150
# ------------------------------------------------------------------------
151151
# NumPy Array Interface
152152

153-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
153+
def __array__(
154+
self, dtype: NpDtype | None = None, copy: bool | None = None
155+
) -> np.ndarray:
154156
return np.asarray(self._ndarray, dtype=dtype)
155157

156158
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

pandas/core/arrays/period.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
get_period_field_arr,
5151
period_asfreq_arr,
5252
)
53+
from pandas.compat.numpy import np_version_gt2
5354
from pandas.util._decorators import (
5455
cache_readonly,
5556
doc,
@@ -243,6 +244,9 @@ def __init__(
243244
if not isinstance(dtype, PeriodDtype):
244245
raise ValueError(f"Invalid dtype {dtype} for PeriodArray")
245246

247+
if np_version_gt2 and not copy:
248+
copy = None
249+
246250
if isinstance(values, ABCSeries):
247251
values = values._values
248252
if not isinstance(values, type(self)):
@@ -400,7 +404,9 @@ def freq(self) -> BaseOffset:
400404
def freqstr(self) -> str:
401405
return PeriodDtype(self.freq)._freqstr
402406

403-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
407+
def __array__(
408+
self, dtype: NpDtype | None = None, copy: bool | None = None
409+
) -> np.ndarray:
404410
if dtype == "i8":
405411
return self.asi8
406412
elif dtype == bool:

pandas/core/arrays/sparse/array.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,9 @@ def from_spmatrix(cls, data: spmatrix) -> Self:
554554

555555
return cls._simple_new(arr, index, dtype)
556556

557-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
557+
def __array__(
558+
self, dtype: NpDtype | None = None, copy: bool | None = None
559+
) -> np.ndarray:
558560
fill_value = self.fill_value
559561

560562
if self.sp_index.ngaps == 0:

pandas/core/arrays/timedeltas.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@
3636
parse_timedelta_unit,
3737
truediv_object_array,
3838
)
39-
from pandas.compat.numpy import function as nv
39+
from pandas.compat.numpy import (
40+
function as nv,
41+
np_version_gt2,
42+
)
4043
from pandas.util._validators import validate_endpoints
4144

4245
from pandas.core.dtypes.common import (
@@ -1072,6 +1075,9 @@ def sequence_to_td64ns(
10721075
# This includes datetime64-dtype, see GH#23539, GH#29794
10731076
raise TypeError(f"dtype {data.dtype} cannot be converted to timedelta64[ns]")
10741077

1078+
copy_false = None if np_version_gt2 else False
1079+
if not copy:
1080+
copy = copy_false
10751081
data = np.array(data, copy=copy)
10761082

10771083
assert data.dtype.kind == "m"
@@ -1152,7 +1158,8 @@ def _objects_to_td64ns(
11521158
higher level.
11531159
"""
11541160
# coerce Index to np.ndarray, converting string-dtype if necessary
1155-
values = np.array(data, dtype=np.object_, copy=False)
1161+
copy_false = None if np_version_gt2 else False
1162+
values = np.array(data, dtype=np.object_, copy=copy_false)
11561163

11571164
result = array_to_timedelta64(values, unit=unit, errors=errors)
11581165
return result.view("timedelta64[ns]")

pandas/core/construction.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
DtypeObj,
3535
T,
3636
)
37+
from pandas.compat.numpy import np_version_gt2
3738
from pandas.util._exceptions import find_stack_level
3839

3940
from pandas.core.dtypes.base import ExtensionDtype
@@ -626,6 +627,8 @@ def sanitize_array(
626627

627628
elif hasattr(data, "__array__"):
628629
# e.g. dask array GH#38645
630+
if np_version_gt2 and not copy:
631+
copy = None
629632
data = np.array(data, copy=copy)
630633
return sanitize_array(
631634
data,
@@ -735,6 +738,9 @@ def _sanitize_str_dtypes(
735738
"""
736739
Ensure we have a dtype that is supported by pandas.
737740
"""
741+
copy_false = None if np_version_gt2 else False
742+
if not copy:
743+
copy = copy_false
738744

739745
# This is to prevent mixed-type Series getting all casted to
740746
# NumPy string type, e.g. NaN --> '-1#IND'.
@@ -744,7 +750,7 @@ def _sanitize_str_dtypes(
744750
# GH#19853: If data is a scalar, result has already the result
745751
if not lib.is_scalar(data):
746752
if not np.all(isna(data)):
747-
data = np.array(data, dtype=dtype, copy=False)
753+
data = np.array(data, dtype=dtype, copy=copy_false)
748754
result = np.array(data, dtype=object, copy=copy)
749755
return result
750756

@@ -781,6 +787,8 @@ def _try_cast(
781787
np.ndarray or ExtensionArray
782788
"""
783789
is_ndarray = isinstance(arr, np.ndarray)
790+
if np_version_gt2 and not copy:
791+
copy = None
784792

785793
if dtype == object:
786794
if not is_ndarray:

pandas/core/dtypes/cast.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1491,6 +1491,9 @@ def construct_2d_arraylike_from_scalar(
14911491
value: Scalar, length: int, width: int, dtype: np.dtype, copy: bool
14921492
) -> np.ndarray:
14931493
shape = (length, width)
1494+
copy_false = None if np_version_gt2 else False
1495+
if not copy:
1496+
copy = copy_false
14941497

14951498
if dtype.kind in "mM":
14961499
value = _maybe_box_and_unbox_datetimelike(value, dtype)
@@ -1652,7 +1655,8 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
16521655
"out-of-bound Python int",
16531656
DeprecationWarning,
16541657
)
1655-
casted = np.array(arr, dtype=dtype, copy=False)
1658+
copy_false = None if np_version_gt2 else False
1659+
casted = np.array(arr, dtype=dtype, copy=copy_false)
16561660
else:
16571661
with warnings.catch_warnings():
16581662
warnings.filterwarnings("ignore", category=RuntimeWarning)

0 commit comments

Comments
 (0)