From f677011d4396f4cb078c76b5dc90e285571ada83 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Sat, 17 Jan 2015 16:32:54 +0900
Subject: [PATCH] BUG: where coerces numeric to str incorrectly

---
 doc/source/whatsnew/v0.16.0.txt |  2 ++
 pandas/core/common.py           | 15 ++++++++++++++-
 pandas/core/generic.py          |  6 +++++-
 pandas/core/internals.py        |  4 +++-
 pandas/tests/test_common.py     | 28 ++++++++++++++++++++++++++++
 pandas/tests/test_series.py     | 22 ++++++++++++++++++++++
 6 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt
index 2db455272363b..d8fc10dd54e8c 100644
--- a/doc/source/whatsnew/v0.16.0.txt
+++ b/doc/source/whatsnew/v0.16.0.txt
@@ -187,6 +187,8 @@ Bug Fixes
 - Bug in read_csv when using skiprows on a file with CR line endings with the c engine. (:issue:`9079`)
 - isnull now detects ``NaT`` in PeriodIndex (:issue:`9129`)
 - Bug in groupby ``.nth()`` with a multiple column groupby (:issue:`8979`)
+- Bug in ``DataFrame.where`` and ``Series.where`` coerce numerics to string incorrectly (:issue:`9280`)
+- Bug in ``DataFrame.where`` and ``Series.where`` raise ``ValueError`` when string list-like is passed. (:issue:`9280`)
 
 - Fixed division by zero error for ``Series.kurt()`` when all values are equal (:issue:`9197`)
 
diff --git a/pandas/core/common.py b/pandas/core/common.py
index 143f65ee64e60..f8f5928ca7d51 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -19,7 +19,7 @@
 import pandas.lib as lib
 import pandas.tslib as tslib
 from pandas import compat
-from pandas.compat import StringIO, BytesIO, range, long, u, zip, map
+from pandas.compat import StringIO, BytesIO, range, long, u, zip, map, string_types
 
 from pandas.core.config import get_option
 
@@ -1322,6 +1322,19 @@ def _possibly_downcast_to_dtype(result, dtype):
     return result
 
 
+def _maybe_convert_string_to_object(values):
+    """
+    Convert string-like and string-like array to convert object dtype.
+    This is to avoid numpy to handle the array as str dtype.
+    """
+    if isinstance(values, string_types):
+        values = np.array([values], dtype=object)
+    elif (isinstance(values, np.ndarray) and
+        issubclass(values.dtype.type, (np.string_, np.unicode_))):
+        values = values.astype(object)
+    return values
+
+
 def _lcd_dtypes(a_dtype, b_dtype):
     """ return the lcd dtype to hold these types """
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 7fa64e0b4ca91..b2adfae744db7 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3292,7 +3292,11 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
             if self.ndim == 1:
 
                 # try to set the same dtype as ourselves
-                new_other = np.array(other, dtype=self.dtype)
+                try:
+                    new_other = np.array(other, dtype=self.dtype)
+                except ValueError:
+                    new_other = np.array(other)
+
                 if not (new_other == np.array(other)).all():
                     other = np.array(other)
 
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index ef33e27d861fd..f4abe05097cff 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -13,7 +13,8 @@
                                 ABCSparseSeries, _infer_dtype_from_scalar,
                                 _is_null_datelike_scalar, _maybe_promote,
                                 is_timedelta64_dtype, is_datetime64_dtype,
-                                _possibly_infer_to_datetimelike, array_equivalent)
+                                _possibly_infer_to_datetimelike, array_equivalent,
+                                _maybe_convert_string_to_object)
 from pandas.core.index import Index, MultiIndex, _ensure_index
 from pandas.core.indexing import (_maybe_convert_indices, _length_of_indexer)
 from pandas.core.categorical import Categorical, _maybe_to_categorical, _is_categorical
@@ -1052,6 +1053,7 @@ def where(self, other, cond, align=True, raise_on_error=True,
                 values = values.T
                 is_transposed = not is_transposed
 
+        other = _maybe_convert_string_to_object(other)
 
         # our where function
         def func(c, v, o):
diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py
index 2f57fa593bc40..36d6c39586d97 100644
--- a/pandas/tests/test_common.py
+++ b/pandas/tests/test_common.py
@@ -947,6 +947,34 @@ def test_2d_datetime64(self):
         tm.assert_almost_equal(result, expected)
 
 
+class TestMaybe(tm.TestCase):
+
+    def test_maybe_convert_string_to_array(self):
+        result = com._maybe_convert_string_to_object('x')
+        tm.assert_numpy_array_equal(result, np.array(['x'], dtype=object))
+        self.assertTrue(result.dtype == object)
+
+        result = com._maybe_convert_string_to_object(1)
+        self.assertEquals(result, 1)
+
+        arr = np.array(['x', 'y'], dtype=str)
+        result = com._maybe_convert_string_to_object(arr)
+        tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object))
+        self.assertTrue(result.dtype == object)
+
+        # unicode
+        arr = np.array(['x', 'y']).astype('U')
+        result = com._maybe_convert_string_to_object(arr)
+        tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object))
+        self.assertTrue(result.dtype == object)
+
+        # object
+        arr = np.array(['x', 2], dtype=object)
+        result = com._maybe_convert_string_to_object(arr)
+        tm.assert_numpy_array_equal(result, np.array(['x', 2], dtype=object))
+        self.assertTrue(result.dtype == object)
+
+
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
                    exit=False)
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index b67a8c5de1c2d..a5de26da1606a 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -1886,6 +1886,28 @@ def test_ix_setitem(self):
         self.assertEqual(self.series[d1], 4)
         self.assertEqual(self.series[d2], 6)
 
+    def test_where_numeric_with_string(self):
+        # GH 9280
+        s = pd.Series([1, 2, 3])
+        w = s.where(s>1, 'X')
+
+        self.assertTrue(isinstance(w[0], str))
+        self.assertTrue(isinstance(w[1], int))
+        self.assertTrue(isinstance(w[2], int))
+        self.assertTrue(w.dtype == 'object')
+
+        w = s.where(s>1, ['X', 'Y', 'Z'])
+        self.assertTrue(isinstance(w[0], str))
+        self.assertTrue(isinstance(w[1], int))
+        self.assertTrue(isinstance(w[2], int))
+        self.assertTrue(w.dtype == 'object')
+
+        w = s.where(s>1, np.array(['X', 'Y', 'Z']))
+        self.assertTrue(isinstance(w[0], str))
+        self.assertTrue(isinstance(w[1], int))
+        self.assertTrue(isinstance(w[2], int))
+        self.assertTrue(w.dtype == 'object')
+
     def test_setitem_boolean(self):
         mask = self.series > self.series.median()