Skip to content

BUG: where coerces numeric to str incorrectly #9283

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 18, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.16.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ Bug Fixes
- Bug in read_csv when using skiprows on a file with CR line endings with the c engine. (:issue:`9079`)
- isnull now detects ``NaT`` in PeriodIndex (:issue:`9129`)
- Bug in groupby ``.nth()`` with a multiple column groupby (:issue:`8979`)
- Bug in ``DataFrame.where`` and ``Series.where`` coerce numerics to string incorrectly (:issue:`9280`)
- Bug in ``DataFrame.where`` and ``Series.where`` raise ``ValueError`` when string list-like is passed. (:issue:`9280`)

- Fixed division by zero error for ``Series.kurt()`` when all values are equal (:issue:`9197`)

Expand Down
15 changes: 14 additions & 1 deletion pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import pandas.lib as lib
import pandas.tslib as tslib
from pandas import compat
from pandas.compat import StringIO, BytesIO, range, long, u, zip, map
from pandas.compat import StringIO, BytesIO, range, long, u, zip, map, string_types

from pandas.core.config import get_option

Expand Down Expand Up @@ -1322,6 +1322,19 @@ def _possibly_downcast_to_dtype(result, dtype):
return result


def _maybe_convert_string_to_object(values):
"""
Convert string-like and string-like array to convert object dtype.
This is to avoid numpy to handle the array as str dtype.
"""
if isinstance(values, string_types):
values = np.array([values], dtype=object)
elif (isinstance(values, np.ndarray) and
issubclass(values.dtype.type, (np.string_, np.unicode_))):
values = values.astype(object)
return values


def _lcd_dtypes(a_dtype, b_dtype):
""" return the lcd dtype to hold these types """

Expand Down
6 changes: 5 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3292,7 +3292,11 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
if self.ndim == 1:

# try to set the same dtype as ourselves
new_other = np.array(other, dtype=self.dtype)
try:
new_other = np.array(other, dtype=self.dtype)
except ValueError:
new_other = np.array(other)

if not (new_other == np.array(other)).all():
other = np.array(other)

Expand Down
4 changes: 3 additions & 1 deletion pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
ABCSparseSeries, _infer_dtype_from_scalar,
_is_null_datelike_scalar, _maybe_promote,
is_timedelta64_dtype, is_datetime64_dtype,
_possibly_infer_to_datetimelike, array_equivalent)
_possibly_infer_to_datetimelike, array_equivalent,
_maybe_convert_string_to_object)
from pandas.core.index import Index, MultiIndex, _ensure_index
from pandas.core.indexing import (_maybe_convert_indices, _length_of_indexer)
from pandas.core.categorical import Categorical, _maybe_to_categorical, _is_categorical
Expand Down Expand Up @@ -1052,6 +1053,7 @@ def where(self, other, cond, align=True, raise_on_error=True,
values = values.T
is_transposed = not is_transposed

other = _maybe_convert_string_to_object(other)

# our where function
def func(c, v, o):
Expand Down
28 changes: 28 additions & 0 deletions pandas/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -947,6 +947,34 @@ def test_2d_datetime64(self):
tm.assert_almost_equal(result, expected)


class TestMaybe(tm.TestCase):

def test_maybe_convert_string_to_array(self):
result = com._maybe_convert_string_to_object('x')
tm.assert_numpy_array_equal(result, np.array(['x'], dtype=object))
self.assertTrue(result.dtype == object)

result = com._maybe_convert_string_to_object(1)
self.assertEquals(result, 1)

arr = np.array(['x', 'y'], dtype=str)
result = com._maybe_convert_string_to_object(arr)
tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object))
self.assertTrue(result.dtype == object)

# unicode
arr = np.array(['x', 'y']).astype('U')
result = com._maybe_convert_string_to_object(arr)
tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object))
self.assertTrue(result.dtype == object)

# object
arr = np.array(['x', 2], dtype=object)
result = com._maybe_convert_string_to_object(arr)
tm.assert_numpy_array_equal(result, np.array(['x', 2], dtype=object))
self.assertTrue(result.dtype == object)


if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)
22 changes: 22 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1886,6 +1886,28 @@ def test_ix_setitem(self):
self.assertEqual(self.series[d1], 4)
self.assertEqual(self.series[d2], 6)

def test_where_numeric_with_string(self):
# GH 9280
s = pd.Series([1, 2, 3])
w = s.where(s>1, 'X')

self.assertTrue(isinstance(w[0], str))
self.assertTrue(isinstance(w[1], int))
self.assertTrue(isinstance(w[2], int))
self.assertTrue(w.dtype == 'object')

w = s.where(s>1, ['X', 'Y', 'Z'])
self.assertTrue(isinstance(w[0], str))
self.assertTrue(isinstance(w[1], int))
self.assertTrue(isinstance(w[2], int))
self.assertTrue(w.dtype == 'object')

w = s.where(s>1, np.array(['X', 'Y', 'Z']))
self.assertTrue(isinstance(w[0], str))
self.assertTrue(isinstance(w[1], int))
self.assertTrue(isinstance(w[2], int))
self.assertTrue(w.dtype == 'object')

def test_setitem_boolean(self):
mask = self.series > self.series.median()

Expand Down