diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index f7cec262ca302..8544671ab3702 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -116,12 +116,14 @@ cdef class IndexEngine: if self.is_monotonic_increasing: values = self._get_index_values() - try: - left = values.searchsorted(val, side='left') - right = values.searchsorted(val, side='right') - except TypeError: - # e.g. GH#29189 get_loc(None) with a Float64Index - raise KeyError(val) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=FutureWarning) + try: + left = values.searchsorted(val, side='left') + right = values.searchsorted(val, side='right') + except TypeError: + # e.g. GH#29189 get_loc(None) with a Float64Index + raise KeyError(val) diff = right - left if diff == 0: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index f8f5e5e05bc35..ae6e777d604dc 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -13,6 +13,7 @@ ) from warnings import ( catch_warnings, + filterwarnings, simplefilter, warn, ) @@ -1583,7 +1584,9 @@ def searchsorted(arr, value, side="left", sorter=None) -> np.ndarray: # and `value` is a pd.Timestamp, we may need to convert value arr = ensure_wrapped_if_datetimelike(arr) - return arr.searchsorted(value, side=side, sorter=sorter) + with catch_warnings(): + filterwarnings("ignore", category=FutureWarning) + return arr.searchsorted(value, side=side, sorter=sorter) # ---- # diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index ec69d9ccbdd90..539f7a784da5d 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1,5 +1,6 @@ from __future__ import annotations +from collections import abc from datetime import ( datetime, time, @@ -71,7 +72,10 @@ from pandas.core.arrays._ranges import generate_regular_range from pandas.core.arrays.integer import IntegerArray import pandas.core.common as com -from pandas.core.construction import extract_array +from pandas.core.construction import ( + create_ndarray, + extract_array, +) from pandas.tseries.frequencies import get_period_alias from pandas.tseries.offsets import ( @@ -2012,10 +2016,10 @@ def sequence_to_dt64ns( if not hasattr(data, "dtype"): # e.g. list, tuple - if np.ndim(data) == 0: + if lib.is_iterator(data) or isinstance(data, (abc.KeysView, abc.ValuesView)): # i.e. generator data = list(data) - data = np.asarray(data) + data = create_ndarray(data, copy=False) copy = False elif isinstance(data, ABCMultiIndex): raise TypeError("Cannot create a DatetimeArray from a MultiIndex.") @@ -2026,7 +2030,7 @@ def sequence_to_dt64ns( data = data.to_numpy("int64", na_value=iNaT) elif not isinstance(data, (np.ndarray, ExtensionArray)): # GH#24539 e.g. xarray, dask object - data = np.asarray(data) + data = create_ndarray(data) if isinstance(data, DatetimeArray): inferred_freq = data.freq diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index e9d554200805e..959d7a3784bb0 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -23,7 +23,10 @@ ) from pandas.core.arraylike import OpsMixin from pandas.core.arrays._mixins import NDArrayBackedExtensionArray -from pandas.core.construction import ensure_wrapped_if_datetimelike +from pandas.core.construction import ( + create_ndarray, + ensure_wrapped_if_datetimelike, +) from pandas.core.strings.object_array import ObjectStringArrayMixin @@ -94,12 +97,11 @@ def _from_sequence( if isinstance(dtype, PandasDtype): dtype = dtype._dtype - # error: Argument "dtype" to "asarray" has incompatible type - # "Union[ExtensionDtype, str, dtype[Any], dtype[floating[_64Bit]], Type[object], - # None]"; expected "Union[dtype[Any], None, type, _SupportsDType, str, - # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any], - # _DTypeDict, Tuple[Any, Any]]]" - result = np.asarray(scalars, dtype=dtype) # type: ignore[arg-type] + result = create_ndarray( + scalars, + dtype=dtype, # type: ignore[arg-type] + copy=False, + ) if ( result.ndim > 1 and not hasattr(scalars, "dtype") diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index ea87ac64cfe22..148ee79ae47b8 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -1,5 +1,6 @@ from __future__ import annotations +from collections import abc from datetime import timedelta from typing import TYPE_CHECKING @@ -66,7 +67,10 @@ ) from pandas.core.arrays._ranges import generate_regular_range import pandas.core.common as com -from pandas.core.construction import extract_array +from pandas.core.construction import ( + create_ndarray, + extract_array, +) from pandas.core.ops.common import unpack_zerodim_and_defer if TYPE_CHECKING: @@ -965,10 +969,10 @@ def sequence_to_td64ns( # Unwrap whatever we have into a np.ndarray if not hasattr(data, "dtype"): # e.g. list, tuple - if np.ndim(data) == 0: + if lib.is_iterator(data) or isinstance(data, (abc.KeysView, abc.ValuesView)): # i.e. generator data = list(data) - data = np.array(data, copy=False) + data = create_ndarray(data, copy=False) elif isinstance(data, ABCMultiIndex): raise TypeError("Cannot create a DatetimeArray from a MultiIndex.") else: @@ -978,7 +982,7 @@ def sequence_to_td64ns( data = data.to_numpy("int64", na_value=iNaT) elif not isinstance(data, (np.ndarray, ExtensionArray)): # GH#24539 e.g. xarray, dask object - data = np.asarray(data) + data = create_ndarray(data, copy=False) elif isinstance(data, ABCCategorical): data = data.categories.take(data.codes, fill_value=NaT)._values copy = False diff --git a/pandas/core/common.py b/pandas/core/common.py index 04ff2d2c4618f..c6f31e9fe67d5 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -129,7 +129,7 @@ def is_bool_indexer(key: Any) -> bool: is_array_like(key) and is_extension_array_dtype(key.dtype) ): if key.dtype == np.object_: - key = np.asarray(key) + key = np.asarray(key, dtype=object) if not lib.is_bool_array(key): na_msg = "Cannot mask with non-boolean array containing NA / NaN values" @@ -142,8 +142,10 @@ def is_bool_indexer(key: Any) -> bool: elif is_bool_dtype(key.dtype): return True elif isinstance(key, list): + from pandas.core.construction import create_ndarray + try: - arr = np.asarray(key) + arr = create_ndarray(key, copy=False) return arr.dtype == np.bool_ and len(arr) == len(key) except TypeError: # pragma: no cover return False @@ -221,6 +223,8 @@ def count_not_none(*args) -> int: def asarray_tuplesafe(values, dtype: NpDtype | None = None) -> np.ndarray: + if dtype is not None: + dtype = np.dtype(dtype) if not (isinstance(values, (list, tuple)) or hasattr(values, "__array__")): values = list(values) @@ -229,15 +233,12 @@ def asarray_tuplesafe(values, dtype: NpDtype | None = None) -> np.ndarray: # expected "ndarray") return values._values # type: ignore[return-value] - # error: Non-overlapping container check (element type: "Union[str, dtype[Any], - # None]", container item type: "type") - if isinstance(values, list) and dtype in [ # type: ignore[comparison-overlap] - np.object_, - object, - ]: + if isinstance(values, list) and dtype == np.dtype("object"): return construct_1d_object_array_from_listlike(values) - result = np.asarray(values, dtype=dtype) + from pandas.core.construction import create_ndarray + + result = create_ndarray(values, dtype=dtype, copy=False) if issubclass(result.dtype.type, str): result = np.asarray(values, dtype=object) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 188bb64932de0..aa75d6540edd2 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -12,6 +12,7 @@ Sequence, cast, ) +import warnings import numpy as np import numpy.ma as ma @@ -815,3 +816,20 @@ def create_series_with_explicit_dtype( return Series( data=data, index=index, dtype=dtype, name=name, copy=copy, fastpath=fastpath ) + + +def create_ndarray( + obj, *, dtype: np.dtype | None = None, copy: bool = True +) -> np.ndarray: + """ + Call np.ndarray if we do not know the outcome dtype. + """ + if dtype is not None: + return np.array(obj, dtype=dtype, copy=copy) + try: + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=FutureWarning) + out = np.array(obj, copy=copy) + except (TypeError, ValueError): + out = np.array(obj, dtype=object, copy=copy) + return out diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 40883dd8f747b..16f15234d7167 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -882,7 +882,9 @@ def maybe_infer_dtype_type(element): if hasattr(element, "dtype"): tipo = element.dtype elif is_list_like(element): - element = np.asarray(element) + from pandas.core.construction import create_ndarray + + element = create_ndarray(element, copy=False) tipo = element.dtype return tipo @@ -1608,8 +1610,9 @@ def maybe_cast_to_datetime( if is_datetime64 or is_datetime64tz: dtype = ensure_nanosecond_dtype(dtype) + from pandas.core.construction import create_ndarray - value = np.array(value, copy=False) + value = create_ndarray(value, copy=False) # we have an array of datetime or timedeltas & nulls if value.size or not is_dtype_equal(value.dtype, dtype): @@ -2009,11 +2012,17 @@ def construct_1d_ndarray_preserving_na( values, dtype, copy=copy # type: ignore[arg-type] ) else: + from pandas.core.construction import create_ndarray + # error: Argument "dtype" to "array" has incompatible type # "Union[dtype[Any], ExtensionDtype, None]"; expected "Union[dtype[Any], # None, type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, # Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]" - subarr = np.array(values, dtype=dtype, copy=copy) # type: ignore[arg-type] + subarr = create_ndarray( + values, + dtype=dtype, # type: ignore[arg-type] + copy=copy, + ) return subarr diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index de7c522b4fbec..1f6b89b51d003 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -38,6 +38,7 @@ from pandas.core.dtypes.generic import ABCSeries import pandas.core.common as com +from pandas.core.construction import create_ndarray from pandas.core.indexes.base import ( Index, maybe_extract_name, @@ -152,7 +153,7 @@ def _ensure_array(cls, data, dtype, copy: bool): if not isinstance(data, (ABCSeries, list, tuple)): data = list(data) - data = np.asarray(data, dtype=dtype) + data = create_ndarray(data, dtype=dtype, copy=False) if issubclass(data.dtype.type, str): cls._string_data_error(data) @@ -160,7 +161,7 @@ def _ensure_array(cls, data, dtype, copy: bool): dtype = cls._ensure_dtype(dtype) if copy or not is_dtype_equal(data.dtype, dtype): - subarr = np.array(data, dtype=dtype, copy=copy) + subarr = create_ndarray(data, dtype=dtype, copy=copy) cls._assert_safe_casting(data, subarr) else: subarr = data @@ -169,7 +170,7 @@ def _ensure_array(cls, data, dtype, copy: bool): # GH#13601, GH#20285, GH#27125 raise ValueError("Index data must be 1-dimensional") - subarr = np.asarray(subarr) + subarr = create_ndarray(subarr, copy=False) return subarr @classmethod diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index be5b89f08b5ca..152eb16d36f20 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -42,7 +42,10 @@ ) import pandas.core.common as com -from pandas.core.construction import array as pd_array +from pandas.core.construction import ( + array as pd_array, + create_ndarray, +) from pandas.core.indexers import ( check_array_indexer, is_empty_indexer, @@ -1712,7 +1715,10 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): if isinstance(value, ABCDataFrame): self._setitem_with_indexer_frame_value(indexer, value, name) - elif np.ndim(value) == 2: + elif (hasattr(value, "ndim") and value.ndim == 2) or ( + not hasattr(value, "ndim") + and create_ndarray(value, copy=False).ndim == 2 + ): self._setitem_with_indexer_2d_value(indexer, value) elif len(ilocs) == 1 and lplane_indexer == len(value) and not is_scalar(pi): @@ -1763,7 +1769,7 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): for loc in ilocs: self._setitem_single_column(loc, value, pi) - def _setitem_with_indexer_2d_value(self, indexer, value): + def _setitem_with_indexer_2d_value(self, indexer, value: np.ndarray) -> None: # We get here with np.ndim(value) == 2, excluding DataFrame, # which goes through _setitem_with_indexer_frame_value pi = indexer[0] diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 31e32b053367b..ff0902710fadc 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -421,7 +421,7 @@ def _convert(arr): return self.apply(_convert) def replace(self: T, value, **kwargs) -> T: - assert np.ndim(value) == 0, value + assert not lib.is_list_like(value) # TODO "replace" is right now implemented on the blocks, we should move # it to general array algos so it can be reused here return self.apply_with_block("replace", value=value, **kwargs) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4f1b16e747394..487cbdacde0a9 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -102,6 +102,7 @@ import pandas.core.common as com import pandas.core.computation.expressions as expressions from pandas.core.construction import ( + create_ndarray, ensure_wrapped_if_datetimelike, extract_array, ) @@ -933,7 +934,7 @@ def setitem(self, indexer, value): arr_value = value else: is_ea_value = False - arr_value = np.asarray(value) + arr_value = create_ndarray(value, copy=False) if transpose: values = values.T diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 323aa45874d96..7604ed23813a2 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -410,7 +410,7 @@ def convert( ) def replace(self: T, to_replace, value, inplace: bool, regex: bool) -> T: - assert np.ndim(value) == 0, value + assert not is_list_like(value) return self.apply( "replace", to_replace=to_replace, value=value, inplace=inplace, regex=regex ) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index b32eb9e308780..004f73ef97324 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -84,7 +84,10 @@ PyTablesExpr, maybe_expression, ) -from pandas.core.construction import extract_array +from pandas.core.construction import ( + create_ndarray, + extract_array, +) from pandas.core.indexes.api import ensure_index from pandas.core.internals import BlockManager @@ -3854,11 +3857,16 @@ def _create_axes( if table_exists: indexer = len(new_non_index_axes) # i.e. 0 exist_axis = self.non_index_axes[indexer][1] - if not array_equivalent(np.array(append_axis), np.array(exist_axis)): + + if not array_equivalent( + create_ndarray(append_axis, copy=True), + create_ndarray(exist_axis, copy=True), + ): # ahah! -> reindex if array_equivalent( - np.array(sorted(append_axis)), np.array(sorted(exist_axis)) + create_ndarray(sorted(append_axis), copy=True), + create_ndarray(sorted(exist_axis), copy=True), ): append_axis = exist_axis diff --git a/pandas/tests/frame/methods/test_to_records.py b/pandas/tests/frame/methods/test_to_records.py index 2c96cf291c154..ce356fbe59d12 100644 --- a/pandas/tests/frame/methods/test_to_records.py +++ b/pandas/tests/frame/methods/test_to_records.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas.compat import is_numpy_dev + from pandas import ( CategoricalDtype, DataFrame, @@ -171,20 +173,26 @@ def test_to_records_with_categorical(self): ), ), # Pass in a type instance. - ( + pytest.param( {"column_dtypes": str}, np.rec.array( [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")], dtype=[("index", "