diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 79ea7731466d4..90bd61c49d060 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1850,6 +1850,17 @@ def find_common_type(types: list[DtypeObj]) -> DtypeObj: return np.find_common_type(types, []) # type: ignore[arg-type] +def coerce_to_target_dtype(values, other): + """ + Coerce the values to a dtype compat for other. This will always + return values, possibly object dtype, and not raise. + """ + dtype, _ = infer_dtype_from(other, pandas_dtype=True) + new_dtype = find_common_type([values.dtype, dtype]) + + return astype_array_safe(values, new_dtype, copy=False) + + def construct_2d_arraylike_from_scalar( value: Scalar, length: int, width: int, dtype: np.dtype, copy: bool ) -> np.ndarray: diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index fcd5cd0979252..d268501da007d 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -89,6 +89,7 @@ new_block, to_native_types, ) +from pandas.core.missing import fillna_array if TYPE_CHECKING: from pandas import Float64Index @@ -383,8 +384,8 @@ def shift(self: T, periods: int, axis: int, fill_value) -> T: ) def fillna(self: T, value, limit, inplace: bool, downcast) -> T: - return self.apply_with_block( - "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast + return self.apply( + fillna_array, value=value, limit=limit, inplace=inplace, downcast=downcast ) def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 587b9593e58fc..7b3e11c577e14 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1410,6 +1410,19 @@ def interpolate( new_values = values.fillna(value=fill_value, method=method, limit=limit) return self.make_block_same_class(new_values) + def fillna( + self, value, limit=None, inplace: bool = False, downcast=None + ) -> list[Block]: + + res_values = missing.fillna_ea_array( + self.values.ravel(), value, limit=limit, inplace=inplace, downcast=downcast + ) + res_values = ensure_block_shape(res_values, self.ndim, self.shape) + + if res_values.dtype == object: + return [self.make_block(values=res_values)] + return [self.make_block_same_class(values=res_values)] + class ExtensionBlock(libinternals.Block, EABackedBlock): """ @@ -1609,12 +1622,6 @@ def getitem_block_index(self, slicer: slice) -> ExtensionBlock: new_values = self.values[slicer] return type(self)(new_values, self._mgr_locs, ndim=self.ndim) - def fillna( - self, value, limit=None, inplace: bool = False, downcast=None - ) -> list[Block]: - values = self.values.fillna(value=value, limit=limit) - return [self.make_block_same_class(values=values)] - def diff(self, n: int, axis: int = 1) -> list[Block]: if axis == 0 and n != 0: # n==0 case will be a no-op so let is fall through @@ -1785,21 +1792,6 @@ def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Blo new_values = values.shift(periods, fill_value=fill_value, axis=axis) return [self.make_block_same_class(new_values)] - def fillna( - self, value, limit=None, inplace: bool = False, downcast=None - ) -> list[Block]: - - if not self._can_hold_element(value) and self.dtype.kind != "m": - # We support filling a DatetimeTZ with a `value` whose timezone - # is different by coercing to object. - # TODO: don't special-case td64 - return self.coerce_to_target_dtype(value).fillna( - value, limit, inplace, downcast - ) - - new_values = self.values.fillna(value=value, limit=limit) - return [self.make_block_same_class(values=new_values)] - class DatetimeLikeBlock(NDArrayBackedExtensionBlock): """Block for datetime64[ns], timedelta64[ns].""" @@ -2059,7 +2051,9 @@ def extend_blocks(result, blocks=None) -> list[Block]: return blocks -def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike: +def ensure_block_shape( + values: ArrayLike, ndim: int = 1, shape: tuple = (1, -1) +) -> ArrayLike: """ Reshape if possible to have values.ndim == ndim. """ @@ -2070,7 +2064,7 @@ def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike: # block.shape is incorrect for "2D" ExtensionArrays # We can't, and don't need to, reshape. values = cast("np.ndarray | DatetimeArray | TimedeltaArray", values) - values = values.reshape(1, -1) + values = values.reshape(*shape) return values diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 8a3d892876b5c..0b232e1babe16 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -27,7 +27,13 @@ ) from pandas.compat._optional import import_optional_dependency -from pandas.core.dtypes.cast import infer_dtype_from +from pandas.core.dtypes.cast import ( + can_hold_element, + coerce_to_target_dtype, + infer_dtype_from, + maybe_downcast_to_dtype, + soft_convert_objects, +) from pandas.core.dtypes.common import ( is_array_like, is_numeric_v_string_like, @@ -39,6 +45,8 @@ na_value_for_dtype, ) +from pandas.core.construction import extract_array + if TYPE_CHECKING: from pandas import Index @@ -975,3 +983,85 @@ def _rolling_window(a: npt.NDArray[np.bool_], window: int) -> npt.NDArray[np.boo shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) strides = a.strides + (a.strides[-1],) return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides) + + +def _can_hold_element(values, element: Any) -> bool: + """ + Expanded version of core.dtypes.cast.can_hold_element + """ + element = extract_array(element, extract_numpy=True) + return can_hold_element(values, element) + + +def fillna_ea_array(values, value, limit=None, inplace: bool = False, downcast=None): + """ + Fillna logic for ExtensionArrays. + + Dispatches to the EA.fillna method (in which case downcast is currently + ignored), except for datetime64 in which case fallback to object dtype + is currently allowed. + """ + if not _can_hold_element(values, value) and values.dtype.kind == "M": + # We support filling a DatetimeTZ with a `value` whose timezone + # is different by coercing to object. + # TODO: don't special-case td64 + values = values.astype(object) + return fillna_array(values, value, limit=limit, inplace=True, downcast=downcast) + + return values.fillna(value, limit=limit) + + +def fillna_array(values, value, limit=None, inplace: bool = False, downcast=None): + """ + Fillna logic for np.ndarray/ExtensionArray. + + This includes the logic for downcasting if needed. + """ + from pandas.core.array_algos.putmask import ( + putmask_inplace, + validate_putmask, + ) + from pandas.core.arrays import ExtensionArray + + # inplace = validate_bool_kwarg(inplace, "inplace") + + if isinstance(values, ExtensionArray): + return fillna_ea_array( + values, value, limit=limit, inplace=inplace, downcast=downcast + ) + + mask = isna(values) + mask, noop = validate_putmask(values, mask) + + if limit is not None: + limit = algos.validate_limit(None, limit=limit) + mask[mask.cumsum(values.ndim - 1) > limit] = False + + if values.dtype.kind in ["b", "i", "u"]: + # those dtypes can never hold NAs + if inplace: + return values + else: + return values.copy() + + if _can_hold_element(values, value): + values = values if inplace else values.copy() + putmask_inplace(values, mask, value) + + if values.dtype == np.dtype(object): + if downcast is None: + values = soft_convert_objects(values, datetime=True, numeric=False) + + if downcast is None and values.dtype.kind not in ["f", "m", "M"]: + downcast = "infer" + if downcast: + values = maybe_downcast_to_dtype(values, downcast) + return values + + if noop: + # we can't process the value, but nothing to do + return values if inplace else values.copy() + else: + values = coerce_to_target_dtype(values, value) + # bc we have already cast, inplace=True may avoid an extra copy + return fillna_array(values, value, limit=limit, inplace=True, downcast=None) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 45a3bf9b145b9..b5bdf6a70199c 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -1,8 +1,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - from pandas import ( Categorical, DataFrame, @@ -281,7 +279,6 @@ def test_fillna_dtype_conversion_equiv_replace(self, val): result = df.fillna(val) tm.assert_frame_equal(result, expected) - @td.skip_array_manager_invalid_test def test_fillna_datetime_columns(self): # GH#7095 df = DataFrame(