From ff43cefc8d258a432e48471fdc2ad3d34c9ed5f1 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 30 Dec 2020 14:21:07 -0800 Subject: [PATCH] REF: move putmask internals in array_algos.putmask --- pandas/core/array_algos/putmask.py | 34 ++++++++++++++++++++++++++ pandas/core/internals/blocks.py | 39 +++++------------------------- 2 files changed, 40 insertions(+), 33 deletions(-) diff --git a/pandas/core/array_algos/putmask.py b/pandas/core/array_algos/putmask.py index 32c84b6eb234f..2a1b6f784a1f2 100644 --- a/pandas/core/array_algos/putmask.py +++ b/pandas/core/array_algos/putmask.py @@ -120,3 +120,37 @@ def _putmask_preserve(new_values: np.ndarray, new, mask: np.ndarray): except (IndexError, ValueError): new_values[mask] = new return new_values + + +def putmask_without_repeat(values: np.ndarray, mask: np.ndarray, new: Any) -> None: + """ + np.putmask will truncate or repeat if `new` is a listlike with + len(new) != len(values). We require an exact match. + + Parameters + ---------- + values : np.ndarray + mask : np.ndarray[bool] + new : Any + """ + if getattr(new, "ndim", 0) >= 1: + new = new.astype(values.dtype, copy=False) + + # TODO: this prob needs some better checking for 2D cases + nlocs = mask.sum() + if nlocs > 0 and is_list_like(new) and getattr(new, "ndim", 1) == 1: + if nlocs == len(new): + # GH#30567 + # If length of ``new`` is less than the length of ``values``, + # `np.putmask` would first repeat the ``new`` array and then + # assign the masked values hence produces incorrect result. + # `np.place` on the other hand uses the ``new`` values at it is + # to place in the masked locations of ``values`` + np.place(values, mask, new) + # i.e. values[mask] = new + elif mask.shape[-1] == len(new) or len(new) == 1: + np.putmask(values, mask, new) + else: + raise ValueError("cannot assign mismatch length to masked array") + else: + np.putmask(values, mask, new) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 76b30dc17711e..38976ee632419 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -54,7 +54,11 @@ from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna import pandas.core.algorithms as algos -from pandas.core.array_algos.putmask import putmask_inplace, putmask_smart +from pandas.core.array_algos.putmask import ( + putmask_inplace, + putmask_smart, + putmask_without_repeat, +) from pandas.core.array_algos.replace import compare_or_regex_search, replace_regex from pandas.core.array_algos.transforms import shift from pandas.core.arrays import ( @@ -1030,38 +1034,7 @@ def putmask(self, mask, new, axis: int = 0) -> List["Block"]: if transpose: new_values = new_values.T - # If the default repeat behavior in np.putmask would go in the - # wrong direction, then explicitly repeat and reshape new instead - if getattr(new, "ndim", 0) >= 1: - new = new.astype(new_values.dtype, copy=False) - - # we require exact matches between the len of the - # values we are setting (or is compat). np.putmask - # doesn't check this and will simply truncate / pad - # the output, but we want sane error messages - # - # TODO: this prob needs some better checking - # for 2D cases - if ( - is_list_like(new) - and np.any(mask[mask]) - and getattr(new, "ndim", 1) == 1 - ): - if mask[mask].shape[-1] == len(new): - # GH 30567 - # If length of ``new`` is less than the length of ``new_values``, - # `np.putmask` would first repeat the ``new`` array and then - # assign the masked values hence produces incorrect result. - # `np.place` on the other hand uses the ``new`` values at it is - # to place in the masked locations of ``new_values`` - np.place(new_values, mask, new) - # i.e. new_values[mask] = new - elif mask.shape[-1] == len(new) or len(new) == 1: - np.putmask(new_values, mask, new) - else: - raise ValueError("cannot assign mismatch length to masked array") - else: - np.putmask(new_values, mask, new) + putmask_without_repeat(new_values, mask, new) # maybe upcast me elif mask.any():