Skip to content

Commit 356ba63

Browse files
jbrockmendelluckyvs1
authored andcommitted
REF: implement array_algos.putmask (pandas-dev#38793)
1 parent be3f344 commit 356ba63

File tree

2 files changed

+128
-117
lines changed

2 files changed

+128
-117
lines changed

pandas/core/array_algos/putmask.py

+122
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
"""
2+
EA-compatible analogue to to np.putmask
3+
"""
4+
from typing import Any
5+
import warnings
6+
7+
import numpy as np
8+
9+
from pandas._libs import lib
10+
from pandas._typing import ArrayLike
11+
12+
from pandas.core.dtypes.cast import convert_scalar_for_putitemlike, maybe_promote
13+
from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype, is_list_like
14+
from pandas.core.dtypes.missing import isna_compat
15+
16+
17+
def putmask_inplace(values: ArrayLike, mask: np.ndarray, value: Any) -> None:
18+
"""
19+
ExtensionArray-compatible implementation of np.putmask. The main
20+
difference is we do not handle repeating or truncating like numpy.
21+
22+
Parameters
23+
----------
24+
mask : np.ndarray[bool]
25+
We assume _extract_bool_array has already been called.
26+
value : Any
27+
"""
28+
29+
if lib.is_scalar(value) and isinstance(values, np.ndarray):
30+
value = convert_scalar_for_putitemlike(value, values.dtype)
31+
32+
if not isinstance(values, np.ndarray) or (
33+
values.dtype == object and not lib.is_scalar(value)
34+
):
35+
# GH#19266 using np.putmask gives unexpected results with listlike value
36+
if is_list_like(value) and len(value) == len(values):
37+
values[mask] = value[mask]
38+
else:
39+
values[mask] = value
40+
else:
41+
# GH#37833 np.putmask is more performant than __setitem__
42+
np.putmask(values, mask, value)
43+
44+
45+
def putmask_smart(values: np.ndarray, mask: np.ndarray, new) -> np.ndarray:
46+
"""
47+
Return a new ndarray, try to preserve dtype if possible.
48+
49+
Parameters
50+
----------
51+
values : np.ndarray
52+
`values`, updated in-place.
53+
mask : np.ndarray[bool]
54+
Applies to both sides (array like).
55+
new : `new values` either scalar or an array like aligned with `values`
56+
57+
Returns
58+
-------
59+
values : ndarray with updated values
60+
this *may* be a copy of the original
61+
62+
See Also
63+
--------
64+
ndarray.putmask
65+
"""
66+
# we cannot use np.asarray() here as we cannot have conversions
67+
# that numpy does when numeric are mixed with strings
68+
69+
# n should be the length of the mask or a scalar here
70+
if not is_list_like(new):
71+
new = np.repeat(new, len(mask))
72+
73+
# see if we are only masking values that if putted
74+
# will work in the current dtype
75+
try:
76+
nn = new[mask]
77+
except TypeError:
78+
# TypeError: only integer scalar arrays can be converted to a scalar index
79+
pass
80+
else:
81+
# make sure that we have a nullable type if we have nulls
82+
if not isna_compat(values, nn[0]):
83+
pass
84+
elif not (is_float_dtype(nn.dtype) or is_integer_dtype(nn.dtype)):
85+
# only compare integers/floats
86+
pass
87+
elif not (is_float_dtype(values.dtype) or is_integer_dtype(values.dtype)):
88+
# only compare integers/floats
89+
pass
90+
else:
91+
92+
# we ignore ComplexWarning here
93+
with warnings.catch_warnings(record=True):
94+
warnings.simplefilter("ignore", np.ComplexWarning)
95+
nn_at = nn.astype(values.dtype)
96+
97+
comp = nn == nn_at
98+
if is_list_like(comp) and comp.all():
99+
nv = values.copy()
100+
nv[mask] = nn_at
101+
return nv
102+
103+
new = np.asarray(new)
104+
105+
if values.dtype.kind == new.dtype.kind:
106+
# preserves dtype if possible
107+
return _putmask_preserve(values, new, mask)
108+
109+
# change the dtype if needed
110+
dtype, _ = maybe_promote(new.dtype)
111+
112+
values = values.astype(dtype)
113+
114+
return _putmask_preserve(values, new, mask)
115+
116+
117+
def _putmask_preserve(new_values: np.ndarray, new, mask: np.ndarray):
118+
try:
119+
new_values[mask] = new[mask]
120+
except (IndexError, ValueError):
121+
new_values[mask] = new
122+
return new_values

pandas/core/internals/blocks.py

+6-117
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import inspect
22
import re
33
from typing import TYPE_CHECKING, Any, List, Optional, Type, Union, cast
4-
import warnings
54

65
import numpy as np
76

@@ -42,9 +41,7 @@
4241
is_dtype_equal,
4342
is_extension_array_dtype,
4443
is_float,
45-
is_float_dtype,
4644
is_integer,
47-
is_integer_dtype,
4845
is_list_like,
4946
is_object_dtype,
5047
is_re,
@@ -54,9 +51,10 @@
5451
)
5552
from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype
5653
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndex, ABCPandasArray, ABCSeries
57-
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, isna_compat
54+
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna
5855

5956
import pandas.core.algorithms as algos
57+
from pandas.core.array_algos.putmask import putmask_inplace, putmask_smart
6058
from pandas.core.array_algos.replace import compare_or_regex_search, replace_regex
6159
from pandas.core.array_algos.transforms import shift
6260
from pandas.core.arrays import (
@@ -437,7 +435,7 @@ def fillna(
437435

438436
if self._can_hold_element(value):
439437
nb = self if inplace else self.copy()
440-
nb._putmask_simple(mask, value)
438+
putmask_inplace(nb.values, mask, value)
441439
# TODO: should be nb._maybe_downcast?
442440
return self._maybe_downcast([nb], downcast)
443441

@@ -762,7 +760,7 @@ def replace(
762760
)
763761

764762
blk = self if inplace else self.copy()
765-
blk._putmask_simple(mask, value)
763+
putmask_inplace(blk.values, mask, value)
766764
blocks = blk.convert(numeric=False, copy=not inplace)
767765
return blocks
768766

@@ -991,35 +989,6 @@ def setitem(self, indexer, value):
991989
block = self.make_block(values)
992990
return block
993991

994-
def _putmask_simple(self, mask: np.ndarray, value: Any):
995-
"""
996-
Like putmask but
997-
998-
a) we do not cast on failure
999-
b) we do not handle repeating or truncating like numpy.
1000-
1001-
Parameters
1002-
----------
1003-
mask : np.ndarray[bool]
1004-
We assume _extract_bool_array has already been called.
1005-
value : Any
1006-
We assume self._can_hold_element(value)
1007-
"""
1008-
values = self.values
1009-
1010-
if lib.is_scalar(value) and isinstance(values, np.ndarray):
1011-
value = convert_scalar_for_putitemlike(value, values.dtype)
1012-
1013-
if self.is_extension or (self.is_object and not lib.is_scalar(value)):
1014-
# GH#19266 using np.putmask gives unexpected results with listlike value
1015-
if is_list_like(value) and len(value) == len(values):
1016-
values[mask] = value[mask]
1017-
else:
1018-
values[mask] = value
1019-
else:
1020-
# GH#37833 np.putmask is more performant than __setitem__
1021-
np.putmask(values, mask, value)
1022-
1023992
def putmask(self, mask, new, axis: int = 0) -> List["Block"]:
1024993
"""
1025994
putmask the data to the block; it is possible that we may create a
@@ -1121,7 +1090,7 @@ def f(mask, val, idx):
11211090
# we need to explicitly astype here to make a copy
11221091
n = n.astype(dtype)
11231092

1124-
nv = _putmask_smart(val, mask, n)
1093+
nv = putmask_smart(val, mask, n)
11251094
return nv
11261095

11271096
new_blocks = self.split_and_operate(mask, f, True)
@@ -1560,7 +1529,7 @@ def _replace_coerce(
15601529
nb = self.coerce_to_target_dtype(value)
15611530
if nb is self and not inplace:
15621531
nb = nb.copy()
1563-
nb._putmask_simple(mask, value)
1532+
putmask_inplace(nb.values, mask, value)
15641533
return [nb]
15651534
else:
15661535
regex = _should_use_regex(regex, to_replace)
@@ -2665,86 +2634,6 @@ def safe_reshape(arr, new_shape: Shape):
26652634
return arr
26662635

26672636

2668-
def _putmask_smart(v: np.ndarray, mask: np.ndarray, n) -> np.ndarray:
2669-
"""
2670-
Return a new ndarray, try to preserve dtype if possible.
2671-
2672-
Parameters
2673-
----------
2674-
v : np.ndarray
2675-
`values`, updated in-place.
2676-
mask : np.ndarray[bool]
2677-
Applies to both sides (array like).
2678-
n : `new values` either scalar or an array like aligned with `values`
2679-
2680-
Returns
2681-
-------
2682-
values : ndarray with updated values
2683-
this *may* be a copy of the original
2684-
2685-
See Also
2686-
--------
2687-
ndarray.putmask
2688-
"""
2689-
# we cannot use np.asarray() here as we cannot have conversions
2690-
# that numpy does when numeric are mixed with strings
2691-
2692-
# n should be the length of the mask or a scalar here
2693-
if not is_list_like(n):
2694-
n = np.repeat(n, len(mask))
2695-
2696-
# see if we are only masking values that if putted
2697-
# will work in the current dtype
2698-
try:
2699-
nn = n[mask]
2700-
except TypeError:
2701-
# TypeError: only integer scalar arrays can be converted to a scalar index
2702-
pass
2703-
else:
2704-
# make sure that we have a nullable type
2705-
# if we have nulls
2706-
if not isna_compat(v, nn[0]):
2707-
pass
2708-
elif not (is_float_dtype(nn.dtype) or is_integer_dtype(nn.dtype)):
2709-
# only compare integers/floats
2710-
pass
2711-
elif not (is_float_dtype(v.dtype) or is_integer_dtype(v.dtype)):
2712-
# only compare integers/floats
2713-
pass
2714-
else:
2715-
2716-
# we ignore ComplexWarning here
2717-
with warnings.catch_warnings(record=True):
2718-
warnings.simplefilter("ignore", np.ComplexWarning)
2719-
nn_at = nn.astype(v.dtype)
2720-
2721-
comp = nn == nn_at
2722-
if is_list_like(comp) and comp.all():
2723-
nv = v.copy()
2724-
nv[mask] = nn_at
2725-
return nv
2726-
2727-
n = np.asarray(n)
2728-
2729-
def _putmask_preserve(nv, n):
2730-
try:
2731-
nv[mask] = n[mask]
2732-
except (IndexError, ValueError):
2733-
nv[mask] = n
2734-
return nv
2735-
2736-
# preserves dtype if possible
2737-
if v.dtype.kind == n.dtype.kind:
2738-
return _putmask_preserve(v, n)
2739-
2740-
# change the dtype if needed
2741-
dtype, _ = maybe_promote(n.dtype)
2742-
2743-
v = v.astype(dtype)
2744-
2745-
return _putmask_preserve(v, n)
2746-
2747-
27482637
def _extract_bool_array(mask: ArrayLike) -> np.ndarray:
27492638
"""
27502639
If we have a SparseArray or BooleanArray, convert it to ndarray[bool].

0 commit comments

Comments
 (0)