Skip to content

Commit adda113

Browse files
committed
Merge pull request #6354 from cpcloud/bool-block-fail-6353
BLD/TST: fix bool block failures when strings are passed to replace list
2 parents ab27073 + f2492e4 commit adda113

File tree

5 files changed

+60
-16
lines changed

5 files changed

+60
-16
lines changed

doc/source/missing_data.rst

+25
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,31 @@ You can also operate on the DataFrame in place
596596
597597
df.replace(1.5, nan, inplace=True)
598598
599+
.. warning::
600+
601+
When replacing multiple ``bool`` or ``datetime64`` objects, the first
602+
argument to ``replace`` (``to_replace``) must match the type of the value
603+
being replaced type. For example,
604+
605+
.. code-block::
606+
607+
s = Series([True, False, True])
608+
s.replace({'a string': 'new value', True: False})
609+
610+
will raise a ``TypeError`` because one of the ``dict`` keys is not of the
611+
correct type for replacement.
612+
613+
However, when replacing a *single* object such as,
614+
615+
.. code-block::
616+
617+
s = Series([True, False, True])
618+
s.replace('a string', 'another string')
619+
620+
the original ``NDFrame`` object will be returned untouched. We're working on
621+
unifying this API, but for backwards compatibility reasons we cannot break
622+
the latter behavior. See :issue:`6354` for more details.
623+
599624
Missing data casting rules and indexing
600625
---------------------------------------
601626

pandas/core/common.py

+6-8
Original file line numberDiff line numberDiff line change
@@ -359,20 +359,18 @@ def mask_missing(arr, values_to_mask):
359359
if mask is None:
360360
mask = arr == x
361361

362-
# if x is a string and mask is not, then we get a scalar
363-
# return value, which is not good
364-
if not isinstance(mask, np.ndarray):
365-
m = mask
366-
mask = np.empty(arr.shape, dtype=np.bool)
367-
mask.fill(m)
362+
# if x is a string and arr is not, then we get False and we must
363+
# expand the mask to size arr.shape
364+
if np.isscalar(mask):
365+
mask = np.zeros(arr.shape, dtype=bool)
368366
else:
369-
mask = mask | (arr == x)
367+
mask |= arr == x
370368

371369
if na_mask.any():
372370
if mask is None:
373371
mask = isnull(arr)
374372
else:
375-
mask = mask | isnull(arr)
373+
mask |= isnull(arr)
376374

377375
return mask
378376

pandas/core/internals.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import itertools
22
import re
3+
import operator
34
from datetime import datetime, timedelta
45
import copy
56
from collections import defaultdict
@@ -2453,7 +2454,8 @@ def replace_list(self, src_list, dest_list, inplace=False, regex=False):
24532454
def comp(s):
24542455
if isnull(s):
24552456
return isnull(values)
2456-
return values == getattr(s, 'asm8', s)
2457+
return _possibly_compare(values, getattr(s, 'asm8', s),
2458+
operator.eq)
24572459
masks = [comp(s) for i, s in enumerate(src_list)]
24582460

24592461
result_blocks = []
@@ -4153,3 +4155,20 @@ def _possibly_convert_to_indexer(loc):
41534155
elif isinstance(loc, slice):
41544156
loc = lrange(loc.start, loc.stop)
41554157
return loc
4158+
4159+
4160+
def _possibly_compare(a, b, op):
4161+
res = op(a, b)
4162+
is_a_array = isinstance(a, np.ndarray)
4163+
is_b_array = isinstance(b, np.ndarray)
4164+
if np.isscalar(res) and (is_a_array or is_b_array):
4165+
type_names = [type(a).__name__, type(b).__name__]
4166+
4167+
if is_a_array:
4168+
type_names[0] = 'ndarray(dtype=%s)' % a.dtype
4169+
4170+
if is_b_array:
4171+
type_names[1] = 'ndarray(dtype=%s)' % b.dtype
4172+
4173+
raise TypeError("Cannot compare types %r and %r" % tuple(type_names))
4174+
return res

pandas/tests/test_frame.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -8005,9 +8005,8 @@ def test_replace_bool_with_bool(self):
80058005

80068006
def test_replace_with_dict_with_bool_keys(self):
80078007
df = DataFrame({0: [True, False], 1: [False, True]})
8008-
result = df.replace({'asdf': 'asdb', True: 'yes'})
8009-
expected = DataFrame({0: ['yes', False], 1: [False, 'yes']})
8010-
tm.assert_frame_equal(expected, result)
8008+
with tm.assertRaisesRegexp(TypeError, 'Cannot compare types .+'):
8009+
df.replace({'asdf': 'asdb', True: 'yes'})
80118010

80128011
def test_combine_multiple_frames_dtypes(self):
80138012
from pandas import concat

pandas/tests/test_series.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -5270,7 +5270,11 @@ def test_replace(self):
52705270

52715271
# malformed
52725272
self.assertRaises(ValueError, ser.replace, [1, 2, 3], [np.nan, 0])
5273-
self.assertRaises(TypeError, ser.replace, range(1, 3), [np.nan, 0])
5273+
5274+
# make sure that we aren't just masking a TypeError because bools don't
5275+
# implement indexing
5276+
with tm.assertRaisesRegexp(TypeError, 'Cannot compare types .+'):
5277+
ser.replace([1, 2], [np.nan, 0])
52745278

52755279
ser = Series([0, 1, 2, 3, 4])
52765280
result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0])
@@ -5375,9 +5379,8 @@ def test_replace_bool_with_bool(self):
53755379

53765380
def test_replace_with_dict_with_bool_keys(self):
53775381
s = Series([True, False, True])
5378-
result = s.replace({'asdf': 'asdb', True: 'yes'})
5379-
expected = Series(['yes', False, 'yes'])
5380-
tm.assert_series_equal(expected, result)
5382+
with tm.assertRaisesRegexp(TypeError, 'Cannot compare types .+'):
5383+
s.replace({'asdf': 'asdb', True: 'yes'})
53815384

53825385
def test_asfreq(self):
53835386
ts = Series([0., 1., 2.], index=[datetime(2009, 10, 30),

0 commit comments

Comments
 (0)