diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index c0ed198e200f1..6e73e1636a75b 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -39,7 +39,6 @@ is_period_dtype, is_scalar, is_signed_integer_dtype, - is_sparse, is_timedelta64_dtype, is_unsigned_integer_dtype, needs_i8_conversion, @@ -743,7 +742,7 @@ def value_counts( else: - if is_extension_array_dtype(values) or is_sparse(values): + if is_extension_array_dtype(values): # handle Categorical and sparse, result = Series(values)._values.value_counts(dropna=dropna) @@ -1623,7 +1622,7 @@ def take_nd( out : ndarray or None, default None Optional output array, must be appropriate type to hold input and fill_value together, if indexer has any -1 value entries; call - _maybe_promote to determine this type for any fill_value + maybe_promote to determine this type for any fill_value fill_value : any, default np.nan Fill value to replace -1 values with mask_info : tuple of (ndarray, boolean) @@ -1644,9 +1643,7 @@ def take_nd( if is_extension_array_dtype(arr): return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) - if is_sparse(arr): - arr = arr.to_dense() - elif isinstance(arr, (ABCIndexClass, ABCSeries)): + if isinstance(arr, (ABCIndexClass, ABCSeries)): arr = arr._values arr = np.asarray(arr) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 5bd2a2b69deb1..5e8b28267f24f 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -9,7 +9,7 @@ import numpy as np import numpy.ma as ma -from pandas._libs import lib, tslibs +from pandas._libs import lib from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime from pandas.core.dtypes.cast import ( @@ -36,7 +36,7 @@ is_timedelta64_ns_dtype, pandas_dtype, ) -from pandas.core.dtypes.dtypes import ExtensionDtype, registry +from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype, registry from pandas.core.dtypes.generic import ( ABCExtensionArray, ABCIndexClass, @@ -275,7 +275,7 @@ def array( if inferred_dtype == "period": try: return period_array(data, copy=copy) - except tslibs.IncompatibleFrequency: + except IncompatibleFrequency: # We may have a mixture of frequencies. # We choose to return an ndarray, rather than raising. pass @@ -365,7 +365,9 @@ def extract_array(obj, extract_numpy=False): return obj -def sanitize_array(data, index, dtype=None, copy=False, raise_cast_failure=False): +def sanitize_array( + data, index, dtype=None, copy: bool = False, raise_cast_failure: bool = False +): """ Sanitize input data to an ndarray, copy if specified, coerce to the dtype if specified. @@ -486,13 +488,19 @@ def sanitize_array(data, index, dtype=None, copy=False, raise_cast_failure=False return subarr -def _try_cast(arr, dtype, copy, raise_cast_failure): +def _try_cast( + arr, + dtype: Optional[Union[np.dtype, "ExtensionDtype"]], + copy: bool, + raise_cast_failure: bool, +): """ Convert input to numpy ndarray and optionally cast to a given dtype. Parameters ---------- - arr : array-like + arr : ndarray, list, tuple, iterator (catchall) + Excludes: ExtensionArray, Series, Index. dtype : np.dtype, ExtensionDtype or None copy : bool If False, don't copy the data if not needed. @@ -528,11 +536,13 @@ def _try_cast(arr, dtype, copy, raise_cast_failure): if is_categorical_dtype(dtype): # We *do* allow casting to categorical, since we know # that Categorical is the only array type for 'category'. + dtype = cast(CategoricalDtype, dtype) subarr = dtype.construct_array_type()( arr, dtype.categories, ordered=dtype._ordered ) elif is_extension_array_dtype(dtype): # create an extension array from its dtype + dtype = cast(ExtensionDtype, dtype) array_type = dtype.construct_array_type()._from_sequence subarr = array_type(arr, dtype=dtype, copy=copy) elif dtype is not None and raise_cast_failure: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index e31918c21c2ac..b59660056aadb 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1311,9 +1311,8 @@ def construct_1d_ndarray_preserving_na(values, dtype=None, copy=False): >>> np.array([1.0, 2.0, None], dtype='str') array(['1.0', '2.0', 'None'], dtype='>> construct_1d_ndarray_preserving_na([1.0, 2.0, None], dtype='str') - - + >>> construct_1d_ndarray_preserving_na([1.0, 2.0, None], dtype=np.dtype('str')) + array(['1.0', '2.0', None], dtype=object) """ subarr = np.array(values, dtype=dtype, copy=copy) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 4ea649a2a6faf..41677af7b1721 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -888,7 +888,8 @@ def is_dtype_equal(source, target): def is_any_int_dtype(arr_or_dtype) -> bool: - """Check whether the provided array or dtype is of an integer dtype. + """ + Check whether the provided array or dtype is of an integer dtype. In this function, timedelta64 instances are also considered "any-integer" type objects and will return True. diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 01399a23e810e..04c3b2b7714ef 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2128,7 +2128,8 @@ def _can_hold_na(self): return True def _maybe_coerce_values(self, values): - """Input validation for values passed to __init__. Ensure that + """ + Input validation for values passed to __init__. Ensure that we have datetime64ns, coercing if necessary. Parameters