Skip to content

Commit d7e2785

Browse files
committed
BUG: where gives incorrect results when upcasting (GH 9731)
1 parent 8d2818e commit d7e2785

File tree

3 files changed

+54
-34
lines changed

3 files changed

+54
-34
lines changed

doc/source/whatsnew/v0.16.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,4 @@ Bug Fixes
6464

6565

6666
- Bug in ``Series.quantile`` on empty Series of type ``Datetime`` or ``Timedelta`` (:issue:`9675`)
67+
- Bug in ``where`` causing incorrect results when upcasting was required (:issue:`9731`)

pandas/core/common.py

+45-34
Original file line numberDiff line numberDiff line change
@@ -1081,12 +1081,23 @@ def _infer_dtype_from_scalar(val):
10811081
return dtype, val
10821082

10831083

1084-
def _maybe_cast_scalar(dtype, value):
1085-
""" if we a scalar value and are casting to a dtype that needs nan -> NaT
1086-
conversion
1084+
def _maybe_cast(dtype, value):
10871085
"""
1088-
if np.isscalar(value) and dtype in _DATELIKE_DTYPES and isnull(value):
1089-
return tslib.iNaT
1086+
If `dtype` is date-like, then:
1087+
if `value` == nan, then convert to NaT
1088+
if `value` is an integer or integer array, convert to `dtype`
1089+
"""
1090+
if dtype in _DATELIKE_DTYPES:
1091+
if np.isscalar(value):
1092+
if isnull(value):
1093+
return tslib.iNaT
1094+
elif is_integer(value):
1095+
return np.array(value, dtype=dtype)
1096+
1097+
elif isinstance(value, np.ndarray):
1098+
if issubclass(dtype.type, np.integer):
1099+
return np.array(value, dtype=dtype)
1100+
10901101
return value
10911102

10921103

@@ -1154,16 +1165,29 @@ def _maybe_promote(dtype, fill_value=np.nan):
11541165
return dtype, fill_value
11551166

11561167

1157-
def _maybe_upcast_putmask(result, mask, other, dtype=None, change=None):
1158-
""" a safe version of put mask that (potentially upcasts the result
1159-
return the result
1160-
if change is not None, then MUTATE the change (and change the dtype)
1161-
return a changed flag
1168+
def _maybe_upcast_putmask(result, mask, other):
1169+
"""
1170+
A safe version of putmask that potentially upcasts the result
1171+
1172+
Parameters
1173+
----------
1174+
result : ndarray
1175+
The destination array. This will be mutated in-place if no upcasting is
1176+
necessary.
1177+
mask : boolean ndarray
1178+
other : ndarray or scalar
1179+
The source array or value
1180+
1181+
Returns
1182+
-------
1183+
result : ndarray
1184+
changed : boolean
1185+
Set to true if the result array was upcasted
11621186
"""
11631187

11641188
if mask.any():
11651189

1166-
other = _maybe_cast_scalar(result.dtype, other)
1190+
other = _maybe_cast(result.dtype, other)
11671191

11681192
def changeit():
11691193

@@ -1173,39 +1197,26 @@ def changeit():
11731197
om = other[mask]
11741198
om_at = om.astype(result.dtype)
11751199
if (om == om_at).all():
1176-
new_other = result.values.copy()
1177-
new_other[mask] = om_at
1178-
result[:] = new_other
1200+
new_result = result.values.copy()
1201+
new_result[mask] = om_at
1202+
result[:] = new_result
11791203
return result, False
11801204
except:
11811205
pass
11821206

11831207
# we are forced to change the dtype of the result as the input
11841208
# isn't compatible
1185-
r, fill_value = _maybe_upcast(
1186-
result, fill_value=other, dtype=dtype, copy=True)
1187-
np.putmask(r, mask, other)
1188-
1189-
# we need to actually change the dtype here
1190-
if change is not None:
1191-
1192-
# if we are trying to do something unsafe
1193-
# like put a bigger dtype in a smaller one, use the smaller one
1194-
# pragma: no cover
1195-
if change.dtype.itemsize < r.dtype.itemsize:
1196-
raise AssertionError(
1197-
"cannot change dtype of input to smaller size")
1198-
change.dtype = r.dtype
1199-
change[:] = r
1209+
r, _ = _maybe_upcast(result, fill_value=other, copy=True)
1210+
np.place(r, mask, other)
12001211

12011212
return r, True
12021213

1203-
# we want to decide whether putmask will work
1214+
# we want to decide whether place will work
12041215
# if we have nans in the False portion of our mask then we need to
1205-
# upcast (possibily) otherwise we DON't want to upcast (e.g. if we are
1206-
# have values, say integers in the success portion then its ok to not
1216+
# upcast (possibly), otherwise we DON't want to upcast (e.g. if we
1217+
# have values, say integers, in the success portion then it's ok to not
12071218
# upcast)
1208-
new_dtype, fill_value = _maybe_promote(result.dtype, other)
1219+
new_dtype, _ = _maybe_promote(result.dtype, other)
12091220
if new_dtype != result.dtype:
12101221

12111222
# we have a scalar or len 0 ndarray
@@ -1222,7 +1233,7 @@ def changeit():
12221233
return changeit()
12231234

12241235
try:
1225-
np.putmask(result, mask, other)
1236+
np.place(result, mask, other)
12261237
except:
12271238
return changeit()
12281239

pandas/tests/test_series.py

+8
Original file line numberDiff line numberDiff line change
@@ -1688,6 +1688,14 @@ def test_where(self):
16881688
assert_series_equal(s, expected)
16891689
self.assertEqual(s.dtype, expected.dtype)
16901690

1691+
# GH 9731
1692+
s = Series(np.arange(10))
1693+
mask = s > 5
1694+
values = [2.5, 3.5, 4.5, 5.5]
1695+
s[mask] = values
1696+
expected = Series(lrange(6) + values, dtype='float64')
1697+
assert_series_equal(s, expected)
1698+
16911699
# can't do these as we are forced to change the itemsize of the input
16921700
# to something we cannot
16931701
for dtype in [np.int8, np.int16, np.int32, np.float16, np.float32]:

0 commit comments

Comments
 (0)