diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 7be06fe92c418..25939bcdc7c6a 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1732,8 +1732,8 @@ def _round(self, freq, mode, ambiguous, nonexistent): values = self.view("i8") values = cast(np.ndarray, values) nanos = to_offset(freq).nanos - result = round_nsint64(values, mode, nanos) - result = self._maybe_mask_results(result, fill_value=iNaT) + result_i8 = round_nsint64(values, mode, nanos) + result = self._maybe_mask_results(result_i8, fill_value=iNaT) result = result.view(self._ndarray.dtype) return self._simple_new(result, dtype=self.dtype) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index ae44acf06591f..3f5c550545aad 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -371,14 +371,14 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike: if isinstance(dtype, ExtensionDtype): return super().astype(dtype, copy=copy) + na_value: float | np.datetime64 | lib.NoDefault + # coerce if is_float_dtype(dtype): # In astype, we consider dtype=float to also mean na_value=np.nan na_value = np.nan elif is_datetime64_dtype(dtype): - # error: Incompatible types in assignment (expression has type - # "datetime64", variable has type "float") - na_value = np.datetime64("NaT") # type: ignore[assignment] + na_value = np.datetime64("NaT") else: na_value = lib.no_default diff --git a/pandas/core/construction.py b/pandas/core/construction.py index b5a17e1ef882e..98dfad72142f6 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -52,10 +52,10 @@ is_integer_dtype, is_list_like, is_object_dtype, - is_sparse, is_string_dtype, is_timedelta64_ns_dtype, ) +from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.generic import ( ABCExtensionArray, ABCIndex, @@ -549,12 +549,10 @@ def sanitize_array( subarr = _sanitize_ndim(subarr, data, dtype, index) - if not (is_extension_array_dtype(subarr.dtype) or is_extension_array_dtype(dtype)): - # error: Argument 1 to "_sanitize_str_dtypes" has incompatible type - # "ExtensionArray"; expected "ndarray" - subarr = _sanitize_str_dtypes( - subarr, data, dtype, copy # type: ignore[arg-type] - ) + if not ( + isinstance(subarr.dtype, ExtensionDtype) or isinstance(dtype, ExtensionDtype) + ): + subarr = _sanitize_str_dtypes(subarr, data, dtype, copy) is_object_or_str_dtype = is_object_dtype(dtype) or is_string_dtype(dtype) if is_object_dtype(subarr.dtype) and not is_object_or_str_dtype: @@ -599,7 +597,7 @@ def _sanitize_ndim( def _sanitize_str_dtypes( - result: np.ndarray, data, dtype: Optional[DtypeObj], copy: bool + result: np.ndarray, data, dtype: Optional[np.dtype], copy: bool ) -> np.ndarray: """ Ensure we have a dtype that is supported by pandas. @@ -613,11 +611,7 @@ def _sanitize_str_dtypes( # GH#19853: If data is a scalar, result has already the result if not lib.is_scalar(data): if not np.all(isna(data)): - # error: Argument "dtype" to "array" has incompatible type - # "Union[dtype[Any], ExtensionDtype, None]"; expected "Union[dtype[Any], - # None, type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, - # Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]" - data = np.array(data, dtype=dtype, copy=False) # type: ignore[arg-type] + data = np.array(data, dtype=dtype, copy=False) result = np.array(data, dtype=object, copy=copy) return result @@ -666,7 +660,7 @@ def _try_cast( ): return arr - if isinstance(dtype, ExtensionDtype) and (dtype.kind != "M" or is_sparse(dtype)): + if isinstance(dtype, ExtensionDtype) and not isinstance(dtype, DatetimeTZDtype): # create an extension array from its dtype # DatetimeTZ case needs to go through maybe_cast_to_datetime but # SparseDtype does not diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3f3d9f9f2833b..a3744ffa7f9bc 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1400,11 +1400,13 @@ def soft_convert_objects( # GH 20380, when datetime is beyond year 2262, hence outside # bound of nanosecond-resolution 64-bit integers. try: - values = lib.maybe_convert_objects( + converted = lib.maybe_convert_objects( values, convert_datetime=datetime, convert_timedelta=timedelta ) except (OutOfBoundsDatetime, ValueError): return values + if converted is not values: + return converted if numeric and is_object_dtype(values.dtype): converted = lib.maybe_convert_numeric(values, set(), coerce_numeric=True) @@ -1446,10 +1448,9 @@ def convert_dtypes( dtype new dtype """ - is_extension = is_extension_array_dtype(input_array.dtype) if ( convert_string or convert_integer or convert_boolean or convert_floating - ) and not is_extension: + ) and isinstance(input_array, np.ndarray): inferred_dtype = lib.infer_dtype(input_array) if not convert_string and is_string_dtype(inferred_dtype): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 484b01f2c04f0..d1d1993931062 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3300,14 +3300,10 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: ) else: - # error: Incompatible types in assignment (expression has type - # "ndarray", variable has type "List[Any]") - new_values = self.values.T # type: ignore[assignment] + new_arr = self.values.T if copy: - new_values = new_values.copy() - result = self._constructor( - new_values, index=self.columns, columns=self.index - ) + new_arr = new_arr.copy() + result = self._constructor(new_arr, index=self.columns, columns=self.index) return result.__finalize__(self, method="transpose") @@ -3682,17 +3678,15 @@ def _set_item_frame_value(self, key, value: DataFrame) -> None: value = value.reindex(cols, axis=1) # now align rows + arraylike = _reindex_for_setitem(value, self.index) + self._set_item_mgr(key, arraylike) - # error: Incompatible types in assignment (expression has type "ExtensionArray", - # variable has type "DataFrame") - value = _reindex_for_setitem(value, self.index) # type: ignore[assignment] - self._set_item_mgr(key, value) - - def _iset_item_mgr(self, loc: int, value) -> None: + def _iset_item_mgr(self, loc: int | slice | np.ndarray, value) -> None: + # when called from _set_item_mgr loc can be anything returned from get_loc self._mgr.iset(loc, value) self._clear_item_cache() - def _set_item_mgr(self, key, value): + def _set_item_mgr(self, key, value: ArrayLike) -> None: try: loc = self._info_axis.get_loc(key) except KeyError: @@ -3707,9 +3701,9 @@ def _set_item_mgr(self, key, value): if len(self): self._check_setitem_copy() - def _iset_item(self, loc: int, value): - value = self._sanitize_column(value) - self._iset_item_mgr(loc, value) + def _iset_item(self, loc: int, value) -> None: + arraylike = self._sanitize_column(value) + self._iset_item_mgr(loc, arraylike) # check if we are modifying a copy # try to set first as we want an invalid diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6b4e3c7caef50..25c10c215e8cc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3541,7 +3541,8 @@ def _maybe_cache_changed(self, item, value) -> None: The object has called back to us saying maybe it has changed. """ loc = self._info_axis.get_loc(item) - self._mgr.iset(loc, value) + arraylike = value._values + self._mgr.iset(loc, arraylike) @final @property diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 294d1fd078b08..d432b7ef443cc 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -850,7 +850,7 @@ def idelete(self, indexer): self._axes = [self._axes[0], self._axes[1][to_keep]] return self - def iset(self, loc: Union[int, slice, np.ndarray], value): + def iset(self, loc: Union[int, slice, np.ndarray], value: ArrayLike): """ Set new column(s). @@ -861,12 +861,10 @@ def iset(self, loc: Union[int, slice, np.ndarray], value): ---------- loc : integer, slice or boolean mask Positional location (already bounds checked) - value : array-like + value : np.ndarray or ExtensionArray """ # single column -> single integer index if lib.is_integer(loc): - # TODO the extract array should in theory not be needed? - value = extract_array(value, extract_numpy=True) # TODO can we avoid needing to unpack this here? That means converting # DataFrame into 1D array when loc is an integer @@ -904,7 +902,10 @@ def iset(self, loc: Union[int, slice, np.ndarray], value): assert value.shape[0] == len(self._axes[0]) for value_idx, mgr_idx in enumerate(indices): - value_arr = value[:, value_idx] + # error: Invalid index type "Tuple[slice, int]" for + # "Union[ExtensionArray, ndarray]"; expected type + # "Union[int, slice, ndarray]" + value_arr = value[:, value_idx] # type: ignore[index] self.arrays[mgr_idx] = value_arr return diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 5b4b710838ef8..6364816b9ab2d 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -843,8 +843,8 @@ def _list_of_dict_to_arrays( if columns is None: gen = (list(x.keys()) for x in data) sort = not any(isinstance(d, dict) for d in data) - columns = lib.fast_unique_multiple_list_gen(gen, sort=sort) - columns = ensure_index(columns) + pre_cols = lib.fast_unique_multiple_list_gen(gen, sort=sort) + columns = ensure_index(pre_cols) # assure that they are of the base dict class and not of derived # classes diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index b688f1b4fea5f..549d4337dcf54 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1062,7 +1062,7 @@ def idelete(self, indexer) -> BlockManager: axes = [new_columns, self.axes[1]] return type(self)._simple_new(tuple(nbs), axes) - def iset(self, loc: Union[int, slice, np.ndarray], value): + def iset(self, loc: Union[int, slice, np.ndarray], value: ArrayLike): """ Set new item in-place. Does not consolidate. Adds new Block if not contained in the current set of items @@ -1073,6 +1073,7 @@ def iset(self, loc: Union[int, slice, np.ndarray], value): if self._blklocs is None and self.ndim > 1: self._rebuild_blknos_and_blklocs() + # Note: we exclude DTA/TDA here value_is_extension_type = is_extension_array_dtype(value) # categorical/sparse/datetimetz @@ -1429,7 +1430,7 @@ def _slice_take_blocks_ax0( return blocks - def _make_na_block(self, placement, fill_value=None): + def _make_na_block(self, placement: BlockPlacement, fill_value=None) -> Block: if fill_value is None: fill_value = np.nan diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 21c79588317df..2fd39588a3da6 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -888,7 +888,7 @@ def clean_reindex_fill_method(method): return clean_fill_method(method, allow_nearest=True) -def _interp_limit(invalid, fw_limit, bw_limit): +def _interp_limit(invalid: np.ndarray, fw_limit, bw_limit): """ Get indexers of values that won't be filled because they exceed the limits. diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index edf32bade0657..45f1faa637b85 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -56,21 +56,17 @@ def _str_map(self, f, na_value=None, dtype: Optional[Dtype] = None): dtype : Dtype, optional The dtype of the result array. """ - arr = self if dtype is None: dtype = np.dtype("object") if na_value is None: na_value = self._str_na_value - if not len(arr): + if not len(self): # error: Argument 1 to "ndarray" has incompatible type "int"; # expected "Sequence[int]" return np.ndarray(0, dtype=dtype) # type: ignore[arg-type] - if not isinstance(arr, np.ndarray): - # error: Incompatible types in assignment (expression has type "ndarray", - # variable has type "ObjectStringArrayMixin") - arr = np.asarray(arr, dtype=object) # type: ignore[assignment] + arr = np.asarray(self, dtype=object) mask = isna(arr) convert = not np.all(mask) try: diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index e11e74f16030c..35e5abe9ce4e7 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -330,17 +330,6 @@ def test_fillna_frame(self, data_missing): # Non-scalar "scalar" values. super().test_fillna_frame(data_missing) - def test_fillna_fill_other(self, data_missing): - # Same as the parent class test, but with PandasDtype for expected["B"] - # instead of equivalent numpy dtype - data = data_missing - result = pd.DataFrame({"A": data, "B": [np.nan] * len(data)}).fillna({"B": 0.0}) - - expected = pd.DataFrame({"A": data, "B": [0.0] * len(result)}) - expected["B"] = expected["B"].astype(PandasDtype(expected["B"].dtype)) - - self.assert_frame_equal(result, expected) - class TestReshaping(BaseNumPyTests, base.BaseReshapingTests): @pytest.mark.skip(reason="Incorrect expected.")