diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index fe5b464a5a18d..216dd1e65de3a 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -87,6 +87,7 @@ ) from pandas.core.dtypes.inference import is_list_like from pandas.core.dtypes.missing import ( + array_equivalent, is_valid_na_for_dtype, isna, na_value_for_dtype, @@ -1970,7 +1971,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: # in smaller int dtypes. info = np.iinfo(dtype) if info.min <= element <= info.max: - return element + return dtype.type(element) raise ValueError if tipo is not None: @@ -2026,6 +2027,15 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: if element._hasna: raise ValueError return element + elif tipo.itemsize > dtype.itemsize: + if isinstance(element, np.ndarray): + # e.g. TestDataFrameIndexingWhere::test_where_alignment + casted = element.astype(dtype) + # TODO(np>=1.20): we can just use np.array_equal with equal_nan + if array_equivalent(casted, element): + return casted + raise ValueError + return element if lib.is_integer(element) or lib.is_float(element): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 77a0d7804d27b..60faae114ed07 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -38,8 +38,8 @@ from pandas.core.dtypes.cast import ( can_hold_element, find_result_type, - maybe_downcast_numeric, maybe_downcast_to_dtype, + np_can_hold_element, soft_convert_objects, ) from pandas.core.dtypes.common import ( @@ -1188,13 +1188,19 @@ def where(self, other, cond) -> list[Block]: other = self._standardize_fill_value(other) - if not self._can_hold_element(other): + try: + # try/except here is equivalent to a self._can_hold_element check, + # but this gets us back 'casted' which we will re-use below; + # without using 'casted', expressions.where may do unwanted upcasts. + casted = np_can_hold_element(values.dtype, other) + except (ValueError, TypeError): # we cannot coerce, return a compat dtype block = self.coerce_to_target_dtype(other) blocks = block.where(orig_other, cond) return self._maybe_downcast(blocks, "infer") else: + other = casted alt = setitem_datetimelike_compat(values, icond.sum(), other) if alt is not other: if is_list_like(other) and len(other) < len(values): @@ -1224,38 +1230,13 @@ def where(self, other, cond) -> list[Block]: # Note: expressions.where may upcast. result = expressions.where(~icond, values, other) + # The np_can_hold_element check _should_ ensure that we always + # have result.dtype == self.dtype here. - if self._can_hold_na or self.ndim == 1: - - if transpose: - result = result.T - - return [self.make_block(result)] - - # might need to separate out blocks - cond = ~icond - axis = cond.ndim - 1 - cond = cond.swapaxes(axis, 0) - mask = cond.all(axis=1) - - result_blocks: list[Block] = [] - for m in [mask, ~mask]: - if m.any(): - taken = result.take(m.nonzero()[0], axis=axis) - r = maybe_downcast_numeric(taken, self.dtype) - if r.dtype != taken.dtype: - warnings.warn( - "Downcasting integer-dtype results in .where is " - "deprecated and will change in a future version. " - "To retain the old behavior, explicitly cast the results " - "to the desired dtype.", - FutureWarning, - stacklevel=find_stack_level(), - ) - nb = self.make_block(r.T, placement=self._mgr_locs[m]) - result_blocks.append(nb) + if transpose: + result = result.T - return result_blocks + return [self.make_block(result)] def _unstack( self, diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 3e9bb6fca5558..750672c009f0c 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -141,11 +141,7 @@ def _check_align(df, cond, other, check_dtypes=True): # check other is ndarray cond = df > 0 - warn = None - if df is mixed_int_frame: - warn = FutureWarning - with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"): - _check_align(df, cond, (_safe_add(df).values)) + _check_align(df, cond, (_safe_add(df).values)) # integers are upcast, so don't check the dtypes cond = df > 0 @@ -469,44 +465,43 @@ def test_where_axis(self, using_array_manager): # GH 9736 df = DataFrame(np.random.randn(2, 2)) mask = DataFrame([[False, False], [False, False]]) - s = Series([0, 1]) + ser = Series([0, 1]) expected = DataFrame([[0, 0], [1, 1]], dtype="float64") - result = df.where(mask, s, axis="index") + result = df.where(mask, ser, axis="index") tm.assert_frame_equal(result, expected) result = df.copy() - return_value = result.where(mask, s, axis="index", inplace=True) + return_value = result.where(mask, ser, axis="index", inplace=True) assert return_value is None tm.assert_frame_equal(result, expected) expected = DataFrame([[0, 1], [0, 1]], dtype="float64") - result = df.where(mask, s, axis="columns") + result = df.where(mask, ser, axis="columns") tm.assert_frame_equal(result, expected) result = df.copy() - return_value = result.where(mask, s, axis="columns", inplace=True) + return_value = result.where(mask, ser, axis="columns", inplace=True) assert return_value is None tm.assert_frame_equal(result, expected) + def test_where_axis_with_upcast(self): # Upcast needed df = DataFrame([[1, 2], [3, 4]], dtype="int64") mask = DataFrame([[False, False], [False, False]]) - s = Series([0, np.nan]) + ser = Series([0, np.nan]) expected = DataFrame([[0, 0], [np.nan, np.nan]], dtype="float64") - result = df.where(mask, s, axis="index") + result = df.where(mask, ser, axis="index") tm.assert_frame_equal(result, expected) result = df.copy() - return_value = result.where(mask, s, axis="index", inplace=True) + return_value = result.where(mask, ser, axis="index", inplace=True) assert return_value is None tm.assert_frame_equal(result, expected) - warn = FutureWarning if using_array_manager else None expected = DataFrame([[0, np.nan], [0, np.nan]]) - with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"): - result = df.where(mask, s, axis="columns") + result = df.where(mask, ser, axis="columns") tm.assert_frame_equal(result, expected) expected = DataFrame( @@ -516,7 +511,7 @@ def test_where_axis(self, using_array_manager): } ) result = df.copy() - return_value = result.where(mask, s, axis="columns", inplace=True) + return_value = result.where(mask, ser, axis="columns", inplace=True) assert return_value is None tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_clip.py b/pandas/tests/frame/methods/test_clip.py index e692948c92a26..c851e65a7ad4f 100644 --- a/pandas/tests/frame/methods/test_clip.py +++ b/pandas/tests/frame/methods/test_clip.py @@ -136,7 +136,7 @@ def test_clip_against_unordered_columns(self): tm.assert_frame_equal(result_lower, expected_lower) tm.assert_frame_equal(result_lower_upper, expected_lower_upper) - def test_clip_with_na_args(self, float_frame, using_array_manager): + def test_clip_with_na_args(self, float_frame): """Should process np.nan argument as None""" # GH#17276 tm.assert_frame_equal(float_frame.clip(np.nan), float_frame) @@ -151,9 +151,7 @@ def test_clip_with_na_args(self, float_frame, using_array_manager): ) tm.assert_frame_equal(result, expected) - warn = FutureWarning if using_array_manager else None - with tm.assert_produces_warning(warn, match="Downcasting integer-dtype"): - result = df.clip(lower=[4, 5, np.nan], axis=1) + result = df.clip(lower=[4, 5, np.nan], axis=1) expected = DataFrame( {"col_0": [4, 4, 4], "col_1": [5, 5, 6], "col_2": [7, 8, 9]} )