Skip to content

Commit b89f1d0

Browse files
mtsokolmroeschke
andauthored
MAINT: Adjust the codebase to the new np.array's copy keyword meaning (#57172)
* MAINT: Adjust the codebase to the new np.array copy keyword meaning * Add copy is docstring * Use asarray where possible --------- Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 7988029 commit b89f1d0

33 files changed

+128
-58
lines changed

pandas/core/array_algos/quantile.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def quantile_with_mask(
102102
interpolation=interpolation,
103103
)
104104

105-
result = np.array(result, copy=False)
105+
result = np.asarray(result)
106106
result = result.T
107107

108108
return result
@@ -201,9 +201,9 @@ def _nanpercentile(
201201
]
202202
if values.dtype.kind == "f":
203203
# preserve itemsize
204-
result = np.array(result, dtype=values.dtype, copy=False).T
204+
result = np.asarray(result, dtype=values.dtype).T
205205
else:
206-
result = np.array(result, copy=False).T
206+
result = np.asarray(result).T
207207
if (
208208
result.dtype != values.dtype
209209
and not mask.all()

pandas/core/arrays/arrow/array.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -659,7 +659,9 @@ def __arrow_array__(self, type=None):
659659
"""Convert myself to a pyarrow ChunkedArray."""
660660
return self._pa_array
661661

662-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
662+
def __array__(
663+
self, dtype: NpDtype | None = None, copy: bool | None = None
664+
) -> np.ndarray:
663665
"""Correctly construct numpy arrays when passed to `np.asarray()`."""
664666
return self.to_numpy(dtype=dtype)
665667

pandas/core/arrays/base.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -725,7 +725,10 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
725725

726726
return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy)
727727

728-
return np.array(self, dtype=dtype, copy=copy)
728+
if not copy:
729+
return np.asarray(self, dtype=dtype)
730+
else:
731+
return np.array(self, dtype=dtype, copy=copy)
729732

730733
def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll:
731734
"""

pandas/core/arrays/categorical.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1659,7 +1659,9 @@ def _validate_codes_for_dtype(cls, codes, *, dtype: CategoricalDtype) -> np.ndar
16591659
# -------------------------------------------------------------
16601660

16611661
@ravel_compat
1662-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
1662+
def __array__(
1663+
self, dtype: NpDtype | None = None, copy: bool | None = None
1664+
) -> np.ndarray:
16631665
"""
16641666
The numpy array interface.
16651667
@@ -1668,6 +1670,9 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
16681670
dtype : np.dtype or None
16691671
Specifies the the dtype for the array.
16701672
1673+
copy : bool or None, optional
1674+
Unused.
1675+
16711676
Returns
16721677
-------
16731678
numpy.array

pandas/core/arrays/datetimelike.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,9 @@ def _formatter(self, boxed: bool = False) -> Callable[[object], str]:
353353
# ----------------------------------------------------------------
354354
# Array-Like / EA-Interface Methods
355355

356-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
356+
def __array__(
357+
self, dtype: NpDtype | None = None, copy: bool | None = None
358+
) -> np.ndarray:
357359
# used for Timedelta/DatetimeArray, overwritten by PeriodArray
358360
if is_object_dtype(dtype):
359361
return np.array(list(self), dtype=object)

pandas/core/arrays/datetimes.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -649,12 +649,12 @@ def _resolution_obj(self) -> Resolution:
649649
# ----------------------------------------------------------------
650650
# Array-Like / EA-Interface Methods
651651

652-
def __array__(self, dtype=None) -> np.ndarray:
652+
def __array__(self, dtype=None, copy=None) -> np.ndarray:
653653
if dtype is None and self.tz:
654654
# The default for tz-aware is object, to preserve tz info
655655
dtype = object
656656

657-
return super().__array__(dtype=dtype)
657+
return super().__array__(dtype=dtype, copy=copy)
658658

659659
def __iter__(self) -> Iterator:
660660
"""
@@ -2421,7 +2421,7 @@ def objects_to_datetime64(
24212421
assert errors in ["raise", "coerce"]
24222422

24232423
# if str-dtype, convert
2424-
data = np.array(data, copy=False, dtype=np.object_)
2424+
data = np.asarray(data, dtype=np.object_)
24252425

24262426
result, tz_parsed = tslib.array_to_datetime(
24272427
data,

pandas/core/arrays/interval.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1564,7 +1564,9 @@ def is_non_overlapping_monotonic(self) -> bool:
15641564
# ---------------------------------------------------------------------
15651565
# Conversion
15661566

1567-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
1567+
def __array__(
1568+
self, dtype: NpDtype | None = None, copy: bool | None = None
1569+
) -> np.ndarray:
15681570
"""
15691571
Return the IntervalArray's data as a numpy array of Interval
15701572
objects (with dtype='object')

pandas/core/arrays/masked.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -594,7 +594,9 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
594594

595595
__array_priority__ = 1000 # higher than ndarray so ops dispatch to us
596596

597-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
597+
def __array__(
598+
self, dtype: NpDtype | None = None, copy: bool | None = None
599+
) -> np.ndarray:
598600
"""
599601
the array interface, return my values
600602
We return an object array here to preserve our scalar values

pandas/core/arrays/numeric.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,10 @@ def _coerce_to_data_and_mask(
160160
return values, mask, dtype, inferred_type
161161

162162
original = values
163-
values = np.array(values, copy=copy)
163+
if not copy:
164+
values = np.asarray(values)
165+
else:
166+
values = np.array(values, copy=copy)
164167
inferred_type = None
165168
if values.dtype == object or is_string_dtype(values.dtype):
166169
inferred_type = lib.infer_dtype(values, skipna=True)
@@ -169,7 +172,10 @@ def _coerce_to_data_and_mask(
169172
raise TypeError(f"{values.dtype} cannot be converted to {name}")
170173

171174
elif values.dtype.kind == "b" and checker(dtype):
172-
values = np.array(values, dtype=default_dtype, copy=copy)
175+
if not copy:
176+
values = np.asarray(values, dtype=default_dtype)
177+
else:
178+
values = np.array(values, dtype=default_dtype, copy=copy)
173179

174180
elif values.dtype.kind not in "iuf":
175181
name = dtype_cls.__name__.strip("_")
@@ -208,9 +214,9 @@ def _coerce_to_data_and_mask(
208214
inferred_type not in ["floating", "mixed-integer-float"]
209215
and not mask.any()
210216
):
211-
values = np.array(original, dtype=dtype, copy=False)
217+
values = np.asarray(original, dtype=dtype)
212218
else:
213-
values = np.array(original, dtype="object", copy=False)
219+
values = np.asarray(original, dtype="object")
214220

215221
# we copy as need to coerce here
216222
if mask.any():

pandas/core/arrays/numpy_.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,9 @@ def dtype(self) -> NumpyEADtype:
150150
# ------------------------------------------------------------------------
151151
# NumPy Array Interface
152152

153-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
153+
def __array__(
154+
self, dtype: NpDtype | None = None, copy: bool | None = None
155+
) -> np.ndarray:
154156
return np.asarray(self._ndarray, dtype=dtype)
155157

156158
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

pandas/core/arrays/period.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,10 @@ def __init__(
256256
raise raise_on_incompatible(values, dtype.freq)
257257
values, dtype = values._ndarray, values.dtype
258258

259-
values = np.array(values, dtype="int64", copy=copy)
259+
if not copy:
260+
values = np.asarray(values, dtype="int64")
261+
else:
262+
values = np.array(values, dtype="int64", copy=copy)
260263
if dtype is None:
261264
raise ValueError("dtype is not specified and cannot be inferred")
262265
dtype = cast(PeriodDtype, dtype)
@@ -400,7 +403,9 @@ def freq(self) -> BaseOffset:
400403
def freqstr(self) -> str:
401404
return PeriodDtype(self.freq)._freqstr
402405

403-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
406+
def __array__(
407+
self, dtype: NpDtype | None = None, copy: bool | None = None
408+
) -> np.ndarray:
404409
if dtype == "i8":
405410
return self.asi8
406411
elif dtype == bool:

pandas/core/arrays/sparse/array.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,9 @@ def from_spmatrix(cls, data: spmatrix) -> Self:
554554

555555
return cls._simple_new(arr, index, dtype)
556556

557-
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
557+
def __array__(
558+
self, dtype: NpDtype | None = None, copy: bool | None = None
559+
) -> np.ndarray:
558560
fill_value = self.fill_value
559561

560562
if self.sp_index.ngaps == 0:

pandas/core/arrays/timedeltas.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1072,7 +1072,10 @@ def sequence_to_td64ns(
10721072
# This includes datetime64-dtype, see GH#23539, GH#29794
10731073
raise TypeError(f"dtype {data.dtype} cannot be converted to timedelta64[ns]")
10741074

1075-
data = np.array(data, copy=copy)
1075+
if not copy:
1076+
data = np.asarray(data)
1077+
else:
1078+
data = np.array(data, copy=copy)
10761079

10771080
assert data.dtype.kind == "m"
10781081
assert data.dtype != "m8" # i.e. not unit-less
@@ -1152,7 +1155,7 @@ def _objects_to_td64ns(
11521155
higher level.
11531156
"""
11541157
# coerce Index to np.ndarray, converting string-dtype if necessary
1155-
values = np.array(data, dtype=np.object_, copy=False)
1158+
values = np.asarray(data, dtype=np.object_)
11561159

11571160
result = array_to_timedelta64(values, unit=unit, errors=errors)
11581161
return result.view("timedelta64[ns]")

pandas/core/construction.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,10 @@ def sanitize_array(
626626

627627
elif hasattr(data, "__array__"):
628628
# e.g. dask array GH#38645
629-
data = np.array(data, copy=copy)
629+
if not copy:
630+
data = np.asarray(data)
631+
else:
632+
data = np.array(data, copy=copy)
630633
return sanitize_array(
631634
data,
632635
index=index,
@@ -744,8 +747,11 @@ def _sanitize_str_dtypes(
744747
# GH#19853: If data is a scalar, result has already the result
745748
if not lib.is_scalar(data):
746749
if not np.all(isna(data)):
747-
data = np.array(data, dtype=dtype, copy=False)
748-
result = np.array(data, dtype=object, copy=copy)
750+
data = np.asarray(data, dtype=dtype)
751+
if not copy:
752+
result = np.asarray(data, dtype=object)
753+
else:
754+
result = np.array(data, dtype=object, copy=copy)
749755
return result
750756

751757

@@ -810,6 +816,8 @@ def _try_cast(
810816
# this will raise if we have e.g. floats
811817

812818
subarr = maybe_cast_to_integer_array(arr, dtype)
819+
elif not copy:
820+
subarr = np.asarray(arr, dtype=dtype)
813821
else:
814822
subarr = np.array(arr, dtype=dtype, copy=copy)
815823

pandas/core/dtypes/cast.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1503,7 +1503,10 @@ def construct_2d_arraylike_from_scalar(
15031503

15041504
# Attempt to coerce to a numpy array
15051505
try:
1506-
arr = np.array(value, dtype=dtype, copy=copy)
1506+
if not copy:
1507+
arr = np.asarray(value, dtype=dtype)
1508+
else:
1509+
arr = np.array(value, dtype=dtype, copy=copy)
15071510
except (ValueError, TypeError) as err:
15081511
raise TypeError(
15091512
f"DataFrame constructor called with incompatible data and dtype: {err}"
@@ -1652,7 +1655,7 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
16521655
"out-of-bound Python int",
16531656
DeprecationWarning,
16541657
)
1655-
casted = np.array(arr, dtype=dtype, copy=False)
1658+
casted = np.asarray(arr, dtype=dtype)
16561659
else:
16571660
with warnings.catch_warnings():
16581661
warnings.filterwarnings("ignore", category=RuntimeWarning)

pandas/core/dtypes/missing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,7 @@ def infer_fill_value(val):
564564
"""
565565
if not is_list_like(val):
566566
val = [val]
567-
val = np.array(val, copy=False)
567+
val = np.asarray(val)
568568
if val.dtype.kind in "mM":
569569
return np.array("NaT", dtype=val.dtype)
570570
elif val.dtype == object:

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1919,7 +1919,7 @@ def to_numpy(
19191919
dtype = np.dtype(dtype)
19201920
result = self._mgr.as_array(dtype=dtype, copy=copy, na_value=na_value)
19211921
if result.dtype is not dtype:
1922-
result = np.array(result, dtype=dtype, copy=False)
1922+
result = np.asarray(result, dtype=dtype)
19231923

19241924
return result
19251925

pandas/core/generic.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1989,7 +1989,9 @@ def empty(self) -> bool:
19891989
# GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented
19901990
__array_priority__: int = 1000
19911991

1992-
def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
1992+
def __array__(
1993+
self, dtype: npt.DTypeLike | None = None, copy: bool | None = None
1994+
) -> np.ndarray:
19931995
values = self._values
19941996
arr = np.asarray(values, dtype=dtype)
19951997
if astype_is_view(values.dtype, arr.dtype) and self._mgr.is_single_block:

pandas/core/indexes/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -912,7 +912,7 @@ def __len__(self) -> int:
912912
"""
913913
return len(self._data)
914914

915-
def __array__(self, dtype=None) -> np.ndarray:
915+
def __array__(self, dtype=None, copy=None) -> np.ndarray:
916916
"""
917917
The array interface, return my values.
918918
"""

pandas/core/indexes/multi.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -770,7 +770,7 @@ def _values(self) -> np.ndarray:
770770
):
771771
vals = vals.astype(object)
772772

773-
array_vals = np.array(vals, copy=False)
773+
array_vals = np.asarray(vals)
774774
array_vals = algos.take_nd(array_vals, codes, fill_value=index._na_value)
775775
values.append(array_vals)
776776

@@ -1330,7 +1330,7 @@ def copy( # type: ignore[override]
13301330
new_index._id = self._id
13311331
return new_index
13321332

1333-
def __array__(self, dtype=None) -> np.ndarray:
1333+
def __array__(self, dtype=None, copy=None) -> np.ndarray:
13341334
"""the array interface, return my values"""
13351335
return self.values
13361336

@@ -3357,7 +3357,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
33573357
locs = (level_codes >= idx.start) & (level_codes < idx.stop)
33583358
return locs
33593359

3360-
locs = np.array(level_codes == idx, dtype=bool, copy=False)
3360+
locs = np.asarray(level_codes == idx, dtype=bool)
33613361

33623362
if not locs.any():
33633363
# The label is present in self.levels[level] but unused:

pandas/core/internals/managers.py

+2
Original file line numberDiff line numberDiff line change
@@ -1824,6 +1824,8 @@ def as_array(
18241824
na_value=na_value,
18251825
copy=copy,
18261826
).reshape(blk.shape)
1827+
elif not copy:
1828+
arr = np.asarray(blk.values, dtype=dtype)
18271829
else:
18281830
arr = np.array(blk.values, dtype=dtype, copy=copy)
18291831

pandas/core/series.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -789,7 +789,9 @@ def __len__(self) -> int:
789789

790790
# ----------------------------------------------------------------------
791791
# NDArray Compat
792-
def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
792+
def __array__(
793+
self, dtype: npt.DTypeLike | None = None, copy: bool | None = None
794+
) -> np.ndarray:
793795
"""
794796
Return the values as a NumPy array.
795797
@@ -802,6 +804,9 @@ def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
802804
The dtype to use for the resulting NumPy array. By default,
803805
the dtype is inferred from the data.
804806
807+
copy : bool or None, optional
808+
Unused.
809+
805810
Returns
806811
-------
807812
numpy.ndarray

pandas/io/pytables.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4043,7 +4043,7 @@ def _create_axes(
40434043
if isinstance(data_converted.dtype, CategoricalDtype):
40444044
ordered = data_converted.ordered
40454045
meta = "category"
4046-
metadata = np.array(data_converted.categories, copy=False).ravel()
4046+
metadata = np.asarray(data_converted.categories).ravel()
40474047

40484048
data, dtype_name = _get_data_and_dtype_name(data_converted)
40494049

pandas/tests/arrays/integer/test_arithmetic.py

+1
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
197197
"Addition/subtraction of integers and integer-arrays with Timestamp",
198198
"has no kernel",
199199
"not implemented",
200+
"The 'out' kwarg is necessary. Use numpy.strings.multiply without it.",
200201
]
201202
)
202203
with pytest.raises(errs, match=msg):

0 commit comments

Comments
 (0)