Skip to content

Commit b3e3add

Browse files
committed
Merge pull request #9283 from sinhrks/where_dtype
BUG: where coerces numeric to str incorrectly
2 parents fc2ec85 + f677011 commit b3e3add

File tree

6 files changed

+74
-3
lines changed

6 files changed

+74
-3
lines changed

doc/source/whatsnew/v0.16.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,8 @@ Bug Fixes
187187
- Bug in read_csv when using skiprows on a file with CR line endings with the c engine. (:issue:`9079`)
188188
- isnull now detects ``NaT`` in PeriodIndex (:issue:`9129`)
189189
- Bug in groupby ``.nth()`` with a multiple column groupby (:issue:`8979`)
190+
- Bug in ``DataFrame.where`` and ``Series.where`` coerce numerics to string incorrectly (:issue:`9280`)
191+
- Bug in ``DataFrame.where`` and ``Series.where`` raise ``ValueError`` when string list-like is passed. (:issue:`9280`)
190192

191193
- Fixed division by zero error for ``Series.kurt()`` when all values are equal (:issue:`9197`)
192194

pandas/core/common.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import pandas.lib as lib
2020
import pandas.tslib as tslib
2121
from pandas import compat
22-
from pandas.compat import StringIO, BytesIO, range, long, u, zip, map
22+
from pandas.compat import StringIO, BytesIO, range, long, u, zip, map, string_types
2323

2424
from pandas.core.config import get_option
2525

@@ -1322,6 +1322,19 @@ def _possibly_downcast_to_dtype(result, dtype):
13221322
return result
13231323

13241324

1325+
def _maybe_convert_string_to_object(values):
1326+
"""
1327+
Convert string-like and string-like array to convert object dtype.
1328+
This is to avoid numpy to handle the array as str dtype.
1329+
"""
1330+
if isinstance(values, string_types):
1331+
values = np.array([values], dtype=object)
1332+
elif (isinstance(values, np.ndarray) and
1333+
issubclass(values.dtype.type, (np.string_, np.unicode_))):
1334+
values = values.astype(object)
1335+
return values
1336+
1337+
13251338
def _lcd_dtypes(a_dtype, b_dtype):
13261339
""" return the lcd dtype to hold these types """
13271340

pandas/core/generic.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -3292,7 +3292,11 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
32923292
if self.ndim == 1:
32933293

32943294
# try to set the same dtype as ourselves
3295-
new_other = np.array(other, dtype=self.dtype)
3295+
try:
3296+
new_other = np.array(other, dtype=self.dtype)
3297+
except ValueError:
3298+
new_other = np.array(other)
3299+
32963300
if not (new_other == np.array(other)).all():
32973301
other = np.array(other)
32983302

pandas/core/internals.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
ABCSparseSeries, _infer_dtype_from_scalar,
1414
_is_null_datelike_scalar, _maybe_promote,
1515
is_timedelta64_dtype, is_datetime64_dtype,
16-
_possibly_infer_to_datetimelike, array_equivalent)
16+
_possibly_infer_to_datetimelike, array_equivalent,
17+
_maybe_convert_string_to_object)
1718
from pandas.core.index import Index, MultiIndex, _ensure_index
1819
from pandas.core.indexing import (_maybe_convert_indices, _length_of_indexer)
1920
from pandas.core.categorical import Categorical, _maybe_to_categorical, _is_categorical
@@ -1052,6 +1053,7 @@ def where(self, other, cond, align=True, raise_on_error=True,
10521053
values = values.T
10531054
is_transposed = not is_transposed
10541055

1056+
other = _maybe_convert_string_to_object(other)
10551057

10561058
# our where function
10571059
def func(c, v, o):

pandas/tests/test_common.py

+28
Original file line numberDiff line numberDiff line change
@@ -947,6 +947,34 @@ def test_2d_datetime64(self):
947947
tm.assert_almost_equal(result, expected)
948948

949949

950+
class TestMaybe(tm.TestCase):
951+
952+
def test_maybe_convert_string_to_array(self):
953+
result = com._maybe_convert_string_to_object('x')
954+
tm.assert_numpy_array_equal(result, np.array(['x'], dtype=object))
955+
self.assertTrue(result.dtype == object)
956+
957+
result = com._maybe_convert_string_to_object(1)
958+
self.assertEquals(result, 1)
959+
960+
arr = np.array(['x', 'y'], dtype=str)
961+
result = com._maybe_convert_string_to_object(arr)
962+
tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object))
963+
self.assertTrue(result.dtype == object)
964+
965+
# unicode
966+
arr = np.array(['x', 'y']).astype('U')
967+
result = com._maybe_convert_string_to_object(arr)
968+
tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object))
969+
self.assertTrue(result.dtype == object)
970+
971+
# object
972+
arr = np.array(['x', 2], dtype=object)
973+
result = com._maybe_convert_string_to_object(arr)
974+
tm.assert_numpy_array_equal(result, np.array(['x', 2], dtype=object))
975+
self.assertTrue(result.dtype == object)
976+
977+
950978
if __name__ == '__main__':
951979
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
952980
exit=False)

pandas/tests/test_series.py

+22
Original file line numberDiff line numberDiff line change
@@ -1886,6 +1886,28 @@ def test_ix_setitem(self):
18861886
self.assertEqual(self.series[d1], 4)
18871887
self.assertEqual(self.series[d2], 6)
18881888

1889+
def test_where_numeric_with_string(self):
1890+
# GH 9280
1891+
s = pd.Series([1, 2, 3])
1892+
w = s.where(s>1, 'X')
1893+
1894+
self.assertTrue(isinstance(w[0], str))
1895+
self.assertTrue(isinstance(w[1], int))
1896+
self.assertTrue(isinstance(w[2], int))
1897+
self.assertTrue(w.dtype == 'object')
1898+
1899+
w = s.where(s>1, ['X', 'Y', 'Z'])
1900+
self.assertTrue(isinstance(w[0], str))
1901+
self.assertTrue(isinstance(w[1], int))
1902+
self.assertTrue(isinstance(w[2], int))
1903+
self.assertTrue(w.dtype == 'object')
1904+
1905+
w = s.where(s>1, np.array(['X', 'Y', 'Z']))
1906+
self.assertTrue(isinstance(w[0], str))
1907+
self.assertTrue(isinstance(w[1], int))
1908+
self.assertTrue(isinstance(w[2], int))
1909+
self.assertTrue(w.dtype == 'object')
1910+
18891911
def test_setitem_boolean(self):
18901912
mask = self.series > self.series.median()
18911913

0 commit comments

Comments
 (0)