Skip to content

MAINT: Adjust the codebase to the new np.array's copy keyword meaning #57172

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 5, 2024
6 changes: 3 additions & 3 deletions pandas/core/array_algos/quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def quantile_with_mask(
interpolation=interpolation,
)

result = np.array(result, copy=False)
result = np.asarray(result)
result = result.T

return result
Expand Down Expand Up @@ -201,9 +201,9 @@ def _nanpercentile(
]
if values.dtype.kind == "f":
# preserve itemsize
result = np.array(result, dtype=values.dtype, copy=False).T
result = np.asarray(result, dtype=values.dtype).T
else:
result = np.array(result, copy=False).T
result = np.asarray(result).T
if (
result.dtype != values.dtype
and not mask.all()
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,9 @@ def __arrow_array__(self, type=None):
"""Convert myself to a pyarrow ChunkedArray."""
return self._pa_array

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
"""Correctly construct numpy arrays when passed to `np.asarray()`."""
return self.to_numpy(dtype=dtype)

Expand Down
5 changes: 4 additions & 1 deletion pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,7 +725,10 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:

return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy)

return np.array(self, dtype=dtype, copy=copy)
if not copy:
return np.asarray(self, dtype=dtype)
else:
return np.array(self, dtype=dtype, copy=copy)

def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll:
"""
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1659,7 +1659,9 @@ def _validate_codes_for_dtype(cls, codes, *, dtype: CategoricalDtype) -> np.ndar
# -------------------------------------------------------------

@ravel_compat
def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
"""
The numpy array interface.

Expand All @@ -1668,6 +1670,9 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
dtype : np.dtype or None
Specifies the the dtype for the array.

copy : bool or None, optional
Unused.

Returns
-------
numpy.array
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,9 @@ def _formatter(self, boxed: bool = False) -> Callable[[object], str]:
# ----------------------------------------------------------------
# Array-Like / EA-Interface Methods

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
# used for Timedelta/DatetimeArray, overwritten by PeriodArray
if is_object_dtype(dtype):
return np.array(list(self), dtype=object)
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,12 +649,12 @@ def _resolution_obj(self) -> Resolution:
# ----------------------------------------------------------------
# Array-Like / EA-Interface Methods

def __array__(self, dtype=None) -> np.ndarray:
def __array__(self, dtype=None, copy=None) -> np.ndarray:
if dtype is None and self.tz:
# The default for tz-aware is object, to preserve tz info
dtype = object

return super().__array__(dtype=dtype)
return super().__array__(dtype=dtype, copy=copy)

def __iter__(self) -> Iterator:
"""
Expand Down Expand Up @@ -2421,7 +2421,7 @@ def objects_to_datetime64(
assert errors in ["raise", "coerce"]

# if str-dtype, convert
data = np.array(data, copy=False, dtype=np.object_)
data = np.asarray(data, dtype=np.object_)

result, tz_parsed = tslib.array_to_datetime(
data,
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -1564,7 +1564,9 @@ def is_non_overlapping_monotonic(self) -> bool:
# ---------------------------------------------------------------------
# Conversion

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
"""
Return the IntervalArray's data as a numpy array of Interval
objects (with dtype='object')
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,9 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:

__array_priority__ = 1000 # higher than ndarray so ops dispatch to us

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
"""
the array interface, return my values
We return an object array here to preserve our scalar values
Expand Down
14 changes: 10 additions & 4 deletions pandas/core/arrays/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,10 @@ def _coerce_to_data_and_mask(
return values, mask, dtype, inferred_type

original = values
values = np.array(values, copy=copy)
if not copy:
values = np.asarray(values)
else:
values = np.array(values, copy=copy)
inferred_type = None
if values.dtype == object or is_string_dtype(values.dtype):
inferred_type = lib.infer_dtype(values, skipna=True)
Expand All @@ -169,7 +172,10 @@ def _coerce_to_data_and_mask(
raise TypeError(f"{values.dtype} cannot be converted to {name}")

elif values.dtype.kind == "b" and checker(dtype):
values = np.array(values, dtype=default_dtype, copy=copy)
if not copy:
values = np.asarray(values, dtype=default_dtype)
else:
values = np.array(values, dtype=default_dtype, copy=copy)

elif values.dtype.kind not in "iuf":
name = dtype_cls.__name__.strip("_")
Expand Down Expand Up @@ -208,9 +214,9 @@ def _coerce_to_data_and_mask(
inferred_type not in ["floating", "mixed-integer-float"]
and not mask.any()
):
values = np.array(original, dtype=dtype, copy=False)
values = np.asarray(original, dtype=dtype)
else:
values = np.array(original, dtype="object", copy=False)
values = np.asarray(original, dtype="object")

# we copy as need to coerce here
if mask.any():
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,9 @@ def dtype(self) -> NumpyEADtype:
# ------------------------------------------------------------------------
# NumPy Array Interface

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
return np.asarray(self._ndarray, dtype=dtype)

def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
Expand Down
9 changes: 7 additions & 2 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,10 @@ def __init__(
raise raise_on_incompatible(values, dtype.freq)
values, dtype = values._ndarray, values.dtype

values = np.array(values, dtype="int64", copy=copy)
if not copy:
values = np.asarray(values, dtype="int64")
else:
values = np.array(values, dtype="int64", copy=copy)
if dtype is None:
raise ValueError("dtype is not specified and cannot be inferred")
dtype = cast(PeriodDtype, dtype)
Expand Down Expand Up @@ -400,7 +403,9 @@ def freq(self) -> BaseOffset:
def freqstr(self) -> str:
return PeriodDtype(self.freq)._freqstr

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
if dtype == "i8":
return self.asi8
elif dtype == bool:
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,9 @@ def from_spmatrix(cls, data: spmatrix) -> Self:

return cls._simple_new(arr, index, dtype)

def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
fill_value = self.fill_value

if self.sp_index.ngaps == 0:
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1072,7 +1072,10 @@ def sequence_to_td64ns(
# This includes datetime64-dtype, see GH#23539, GH#29794
raise TypeError(f"dtype {data.dtype} cannot be converted to timedelta64[ns]")

data = np.array(data, copy=copy)
if not copy:
data = np.asarray(data)
else:
data = np.array(data, copy=copy)

assert data.dtype.kind == "m"
assert data.dtype != "m8" # i.e. not unit-less
Expand Down Expand Up @@ -1152,7 +1155,7 @@ def _objects_to_td64ns(
higher level.
"""
# coerce Index to np.ndarray, converting string-dtype if necessary
values = np.array(data, dtype=np.object_, copy=False)
values = np.asarray(data, dtype=np.object_)

result = array_to_timedelta64(values, unit=unit, errors=errors)
return result.view("timedelta64[ns]")
Expand Down
14 changes: 11 additions & 3 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,7 +626,10 @@ def sanitize_array(

elif hasattr(data, "__array__"):
# e.g. dask array GH#38645
data = np.array(data, copy=copy)
if not copy:
data = np.asarray(data)
else:
data = np.array(data, copy=copy)
return sanitize_array(
data,
index=index,
Expand Down Expand Up @@ -744,8 +747,11 @@ def _sanitize_str_dtypes(
# GH#19853: If data is a scalar, result has already the result
if not lib.is_scalar(data):
if not np.all(isna(data)):
data = np.array(data, dtype=dtype, copy=False)
result = np.array(data, dtype=object, copy=copy)
data = np.asarray(data, dtype=dtype)
if not copy:
result = np.asarray(data, dtype=object)
else:
result = np.array(data, dtype=object, copy=copy)
return result


Expand Down Expand Up @@ -810,6 +816,8 @@ def _try_cast(
# this will raise if we have e.g. floats

subarr = maybe_cast_to_integer_array(arr, dtype)
elif not copy:
subarr = np.asarray(arr, dtype=dtype)
else:
subarr = np.array(arr, dtype=dtype, copy=copy)

Expand Down
7 changes: 5 additions & 2 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1503,7 +1503,10 @@ def construct_2d_arraylike_from_scalar(

# Attempt to coerce to a numpy array
try:
arr = np.array(value, dtype=dtype, copy=copy)
if not copy:
arr = np.asarray(value, dtype=dtype)
else:
arr = np.array(value, dtype=dtype, copy=copy)
except (ValueError, TypeError) as err:
raise TypeError(
f"DataFrame constructor called with incompatible data and dtype: {err}"
Expand Down Expand Up @@ -1652,7 +1655,7 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n
"out-of-bound Python int",
DeprecationWarning,
)
casted = np.array(arr, dtype=dtype, copy=False)
casted = np.asarray(arr, dtype=dtype)
else:
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=RuntimeWarning)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,7 @@ def infer_fill_value(val):
"""
if not is_list_like(val):
val = [val]
val = np.array(val, copy=False)
val = np.asarray(val)
if val.dtype.kind in "mM":
return np.array("NaT", dtype=val.dtype)
elif val.dtype == object:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1919,7 +1919,7 @@ def to_numpy(
dtype = np.dtype(dtype)
result = self._mgr.as_array(dtype=dtype, copy=copy, na_value=na_value)
if result.dtype is not dtype:
result = np.array(result, dtype=dtype, copy=False)
result = np.asarray(result, dtype=dtype)

return result

Expand Down
4 changes: 3 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1989,7 +1989,9 @@ def empty(self) -> bool:
# GH#23114 Ensure ndarray.__op__(DataFrame) returns NotImplemented
__array_priority__: int = 1000

def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
def __array__(
self, dtype: npt.DTypeLike | None = None, copy: bool | None = None
) -> np.ndarray:
values = self._values
arr = np.asarray(values, dtype=dtype)
if astype_is_view(values.dtype, arr.dtype) and self._mgr.is_single_block:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -912,7 +912,7 @@ def __len__(self) -> int:
"""
return len(self._data)

def __array__(self, dtype=None) -> np.ndarray:
def __array__(self, dtype=None, copy=None) -> np.ndarray:
"""
The array interface, return my values.
"""
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,7 @@ def _values(self) -> np.ndarray:
):
vals = vals.astype(object)

array_vals = np.array(vals, copy=False)
array_vals = np.asarray(vals)
array_vals = algos.take_nd(array_vals, codes, fill_value=index._na_value)
values.append(array_vals)

Expand Down Expand Up @@ -1330,7 +1330,7 @@ def copy( # type: ignore[override]
new_index._id = self._id
return new_index

def __array__(self, dtype=None) -> np.ndarray:
def __array__(self, dtype=None, copy=None) -> np.ndarray:
"""the array interface, return my values"""
return self.values

Expand Down Expand Up @@ -3357,7 +3357,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
locs = (level_codes >= idx.start) & (level_codes < idx.stop)
return locs

locs = np.array(level_codes == idx, dtype=bool, copy=False)
locs = np.asarray(level_codes == idx, dtype=bool)

if not locs.any():
# The label is present in self.levels[level] but unused:
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1824,6 +1824,8 @@ def as_array(
na_value=na_value,
copy=copy,
).reshape(blk.shape)
elif not copy:
arr = np.asarray(blk.values, dtype=dtype)
else:
arr = np.array(blk.values, dtype=dtype, copy=copy)

Expand Down
7 changes: 6 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -789,7 +789,9 @@ def __len__(self) -> int:

# ----------------------------------------------------------------------
# NDArray Compat
def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
def __array__(
self, dtype: npt.DTypeLike | None = None, copy: bool | None = None
) -> np.ndarray:
"""
Return the values as a NumPy array.

Expand All @@ -802,6 +804,9 @@ def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
The dtype to use for the resulting NumPy array. By default,
the dtype is inferred from the data.

copy : bool or None, optional
Unused.

Returns
-------
numpy.ndarray
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4043,7 +4043,7 @@ def _create_axes(
if isinstance(data_converted.dtype, CategoricalDtype):
ordered = data_converted.ordered
meta = "category"
metadata = np.array(data_converted.categories, copy=False).ravel()
metadata = np.asarray(data_converted.categories).ravel()

data, dtype_name = _get_data_and_dtype_name(data_converted)

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/arrays/integer/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string
"Addition/subtraction of integers and integer-arrays with Timestamp",
"has no kernel",
"not implemented",
"The 'out' kwarg is necessary. Use numpy.strings.multiply without it.",
]
)
with pytest.raises(errs, match=msg):
Expand Down
Loading