Skip to content

Commit 7dfb279

Browse files
committed
Merge pull request #9743 from evanpw/issue_9731
BUG: where gives incorrect results when upcasting (GH 9731)
2 parents b246920 + f0ba9fd commit 7dfb279

File tree

3 files changed

+49
-39
lines changed

3 files changed

+49
-39
lines changed

doc/source/whatsnew/v0.16.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,4 @@ Bug Fixes
7878

7979

8080
- Bug in ``Series.quantile`` on empty Series of type ``Datetime`` or ``Timedelta`` (:issue:`9675`)
81+
- Bug in ``where`` causing incorrect results when upcasting was required (:issue:`9731`)

pandas/core/common.py

+40-39
Original file line numberDiff line numberDiff line change
@@ -1081,15 +1081,6 @@ def _infer_dtype_from_scalar(val):
10811081
return dtype, val
10821082

10831083

1084-
def _maybe_cast_scalar(dtype, value):
1085-
""" if we a scalar value and are casting to a dtype that needs nan -> NaT
1086-
conversion
1087-
"""
1088-
if np.isscalar(value) and dtype in _DATELIKE_DTYPES and isnull(value):
1089-
return tslib.iNaT
1090-
return value
1091-
1092-
10931084
def _maybe_promote(dtype, fill_value=np.nan):
10941085

10951086
# if we passed an array here, determine the fill value by dtype
@@ -1154,16 +1145,39 @@ def _maybe_promote(dtype, fill_value=np.nan):
11541145
return dtype, fill_value
11551146

11561147

1157-
def _maybe_upcast_putmask(result, mask, other, dtype=None, change=None):
1158-
""" a safe version of put mask that (potentially upcasts the result
1159-
return the result
1160-
if change is not None, then MUTATE the change (and change the dtype)
1161-
return a changed flag
1148+
def _maybe_upcast_putmask(result, mask, other):
11621149
"""
1150+
A safe version of putmask that potentially upcasts the result
11631151
1164-
if mask.any():
1152+
Parameters
1153+
----------
1154+
result : ndarray
1155+
The destination array. This will be mutated in-place if no upcasting is
1156+
necessary.
1157+
mask : boolean ndarray
1158+
other : ndarray or scalar
1159+
The source array or value
11651160
1166-
other = _maybe_cast_scalar(result.dtype, other)
1161+
Returns
1162+
-------
1163+
result : ndarray
1164+
changed : boolean
1165+
Set to true if the result array was upcasted
1166+
"""
1167+
1168+
if mask.any():
1169+
# Two conversions for date-like dtypes that can't be done automatically
1170+
# in np.place:
1171+
# NaN -> NaT
1172+
# integer or integer array -> date-like array
1173+
if result.dtype in _DATELIKE_DTYPES:
1174+
if lib.isscalar(other):
1175+
if isnull(other):
1176+
other = tslib.iNaT
1177+
elif is_integer(other):
1178+
other = np.array(other, dtype=result.dtype)
1179+
elif is_integer_dtype(other):
1180+
other = np.array(other, dtype=result.dtype)
11671181

11681182
def changeit():
11691183

@@ -1173,39 +1187,26 @@ def changeit():
11731187
om = other[mask]
11741188
om_at = om.astype(result.dtype)
11751189
if (om == om_at).all():
1176-
new_other = result.values.copy()
1177-
new_other[mask] = om_at
1178-
result[:] = new_other
1190+
new_result = result.values.copy()
1191+
new_result[mask] = om_at
1192+
result[:] = new_result
11791193
return result, False
11801194
except:
11811195
pass
11821196

11831197
# we are forced to change the dtype of the result as the input
11841198
# isn't compatible
1185-
r, fill_value = _maybe_upcast(
1186-
result, fill_value=other, dtype=dtype, copy=True)
1187-
np.putmask(r, mask, other)
1188-
1189-
# we need to actually change the dtype here
1190-
if change is not None:
1191-
1192-
# if we are trying to do something unsafe
1193-
# like put a bigger dtype in a smaller one, use the smaller one
1194-
# pragma: no cover
1195-
if change.dtype.itemsize < r.dtype.itemsize:
1196-
raise AssertionError(
1197-
"cannot change dtype of input to smaller size")
1198-
change.dtype = r.dtype
1199-
change[:] = r
1199+
r, _ = _maybe_upcast(result, fill_value=other, copy=True)
1200+
np.place(r, mask, other)
12001201

12011202
return r, True
12021203

1203-
# we want to decide whether putmask will work
1204+
# we want to decide whether place will work
12041205
# if we have nans in the False portion of our mask then we need to
1205-
# upcast (possibily) otherwise we DON't want to upcast (e.g. if we are
1206-
# have values, say integers in the success portion then its ok to not
1206+
# upcast (possibly), otherwise we DON't want to upcast (e.g. if we
1207+
# have values, say integers, in the success portion then it's ok to not
12071208
# upcast)
1208-
new_dtype, fill_value = _maybe_promote(result.dtype, other)
1209+
new_dtype, _ = _maybe_promote(result.dtype, other)
12091210
if new_dtype != result.dtype:
12101211

12111212
# we have a scalar or len 0 ndarray
@@ -1222,7 +1223,7 @@ def changeit():
12221223
return changeit()
12231224

12241225
try:
1225-
np.putmask(result, mask, other)
1226+
np.place(result, mask, other)
12261227
except:
12271228
return changeit()
12281229

pandas/tests/test_series.py

+8
Original file line numberDiff line numberDiff line change
@@ -1688,6 +1688,14 @@ def test_where(self):
16881688
assert_series_equal(s, expected)
16891689
self.assertEqual(s.dtype, expected.dtype)
16901690

1691+
# GH 9731
1692+
s = Series(np.arange(10), dtype='int64')
1693+
mask = s > 5
1694+
values = [2.5, 3.5, 4.5, 5.5]
1695+
s[mask] = values
1696+
expected = Series(lrange(6) + values, dtype='float64')
1697+
assert_series_equal(s, expected)
1698+
16911699
# can't do these as we are forced to change the itemsize of the input
16921700
# to something we cannot
16931701
for dtype in [np.int8, np.int16, np.int32, np.float16, np.float32]:

0 commit comments

Comments
 (0)