Skip to content

Commit d85ad59

Browse files
committed
COMPAT: invalid casting to nan
closes #12303
1 parent 59580b6 commit d85ad59

File tree

4 files changed

+45
-9
lines changed

4 files changed

+45
-9
lines changed

pandas/core/common.py

+31-1
Original file line numberDiff line numberDiff line change
@@ -1098,6 +1098,33 @@ def _infer_dtype_from_scalar(val):
10981098
return dtype, val
10991099

11001100

1101+
def _is_na_compat(arr, fill_value=np.nan):
1102+
"""
1103+
Parameters
1104+
----------
1105+
arr: a numpy array
1106+
fill_value: fill value, default to np.nan
1107+
1108+
Returns
1109+
-------
1110+
True if we can fill using this fill_value
1111+
"""
1112+
dtype = arr.dtype
1113+
if isnull(fill_value):
1114+
return not (is_bool_dtype(dtype) or
1115+
is_integer_dtype(dtype))
1116+
return True
1117+
1118+
1119+
def _maybe_fill(arr, fill_value=np.nan):
1120+
"""
1121+
if we have a compatiable fill_value and arr dtype, then fill
1122+
"""
1123+
if _is_na_compat(arr, fill_value):
1124+
arr.fill(fill_value)
1125+
return arr
1126+
1127+
11011128
def _maybe_promote(dtype, fill_value=np.nan):
11021129

11031130
# if we passed an array here, determine the fill value by dtype
@@ -1359,7 +1386,10 @@ def trans(x): # noqa
13591386
# do a test on the first element, if it fails then we are done
13601387
r = result.ravel()
13611388
arr = np.array([r[0]])
1362-
if not np.allclose(arr, trans(arr).astype(dtype)):
1389+
1390+
# if we have any nulls, then we are done
1391+
if isnull(arr).any() or not np.allclose(arr,
1392+
trans(arr).astype(dtype)):
13631393
return result
13641394

13651395
# a comparable, e.g. a Decimal may slip in here

pandas/core/groupby.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@
3131
is_timedelta64_dtype, is_datetime64_dtype,
3232
is_categorical_dtype, _values_from_object,
3333
is_datetime_or_timedelta_dtype, is_bool,
34-
is_bool_dtype, AbstractMethodError)
34+
is_bool_dtype, AbstractMethodError,
35+
_maybe_fill)
3536
from pandas.core.config import option_context
3637
import pandas.lib as lib
3738
from pandas.lib import Timestamp
@@ -1725,14 +1726,15 @@ def _cython_operation(self, kind, values, how, axis):
17251726
labels, _, _ = self.group_info
17261727

17271728
if kind == 'aggregate':
1728-
result = np.empty(out_shape, dtype=out_dtype)
1729-
result.fill(np.nan)
1729+
result = _maybe_fill(np.empty(out_shape, dtype=out_dtype),
1730+
fill_value=np.nan)
17301731
counts = np.zeros(self.ngroups, dtype=np.int64)
17311732
result = self._aggregate(
17321733
result, counts, values, labels, func, is_numeric)
17331734
elif kind == 'transform':
1734-
result = np.empty_like(values, dtype=out_dtype)
1735-
result.fill(np.nan)
1735+
result = _maybe_fill(np.empty_like(values, dtype=out_dtype),
1736+
fill_value=np.nan)
1737+
17361738
# temporary storange for running-total type tranforms
17371739
accum = np.empty(out_shape, dtype=out_dtype)
17381740
result = self._transform(

pandas/core/internals.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
is_dtype_equal, is_null_datelike_scalar,
1515
_maybe_promote, is_timedelta64_dtype,
1616
is_datetime64_dtype, is_datetimetz, is_sparse,
17-
array_equivalent,
17+
array_equivalent, _is_na_compat,
1818
_maybe_convert_string_to_object,
1919
is_categorical, is_datetimelike_v_numeric,
2020
is_numeric_v_string_like, is_internal_type)
@@ -4392,7 +4392,6 @@ def _putmask_smart(v, m, n):
43924392
m : `mask`, applies to both sides (array like)
43934393
n : `new values` either scalar or an array like aligned with `values`
43944394
"""
4395-
43964395
# n should be the length of the mask or a scalar here
43974396
if not is_list_like(n):
43984397
n = np.array([n] * len(m))
@@ -4403,6 +4402,12 @@ def _putmask_smart(v, m, n):
44034402
# will work in the current dtype
44044403
try:
44054404
nn = n[m]
4405+
4406+
# make sure that we have a nullable type
4407+
# if we have nulls
4408+
if not _is_na_compat(v, nn[0]):
4409+
raise ValueError
4410+
44064411
nn_at = nn.astype(v.dtype)
44074412

44084413
# avoid invalid dtype comparisons

pandas/tests/test_groupby.py

-1
Original file line numberDiff line numberDiff line change
@@ -5774,7 +5774,6 @@ def test_cython_group_transform_algos(self):
57745774
data = np.array([np.timedelta64(1, 'ns')] * 5, dtype='m8[ns]')[:, None]
57755775
accum = np.array([[0]], dtype='int64')
57765776
actual = np.zeros_like(data, dtype='int64')
5777-
actual.fill(np.nan)
57785777
pd.algos.group_cumsum(actual, data.view('int64'), labels, accum)
57795778
expected = np.array([np.timedelta64(1, 'ns'), np.timedelta64(
57805779
2, 'ns'), np.timedelta64(3, 'ns'), np.timedelta64(4, 'ns'),

0 commit comments

Comments
 (0)