BUG: Patch float and uint handling in to_numeric

gfyoung · jreback · commit 5353e59447a9 · 2016-12-31T10:44:51.000-05:00
1) Patches `float` handling by reducing the "closeness" level when checking conversions. 2) Patches `uint` handling by allowing casts to `uint` dtypes of equal or lesser size to `int64` (when values are less than `INT64_MAX` Closes #14941. Follow-up to #15005. Author: gfyoung <gfyoung17@gmail.com> Closes #15024 from gfyoung/to-numeric-uint and squashes the following commits: 9e35819 [gfyoung] BUG: Patch float and uint handling in to_numeric
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -291,6 +291,7 @@ Bug Fixes
 - Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`)
 - Bug in ``DataFrame.sort_values()`` when sorting by multiple columns where one column is of type ``int64`` and contains ``NaT`` (:issue:`14922`)
 - Bug in ``DataFrame.reindex()`` in which ``method`` was ignored when passing ``columns`` (:issue:`14992`)
+- Bug in ``pd.to_numeric()`` in which float and unsigned integer elements were being improperly casted (:issue:`14941`, :issue:`15005`)
 
 
 - Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`)
diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py
@@ -426,12 +426,16 @@ def test_downcast(self):
 
         # cannot cast to an integer (signed or unsigned)
         # because we have a float number
-        data = ['1.1', 2, 3]
-        expected = np.array([1.1, 2, 3], dtype=np.float64)
+        data = (['1.1', 2, 3],
+                [10000.0, 20000, 3000, 40000.36, 50000, 50000.00])
+        expected = (np.array([1.1, 2, 3], dtype=np.float64),
+                    np.array([10000.0, 20000, 3000,
+                              40000.36, 50000, 50000.00], dtype=np.float64))
 
-        for downcast in ('integer', 'signed', 'unsigned'):
-            res = pd.to_numeric(data, downcast=downcast)
-            tm.assert_numpy_array_equal(res, expected)
+        for _data, _expected in zip(data, expected):
+            for downcast in ('integer', 'signed', 'unsigned'):
+                res = pd.to_numeric(_data, downcast=downcast)
+                tm.assert_numpy_array_equal(res, _expected)
 
         # the smallest integer dtype need not be np.(u)int8
         data = ['256', 257, 258]
@@ -459,8 +463,7 @@ def test_downcast_limits(self):
             ('uint8', u, [iinfo(np.uint8).min, iinfo(np.uint8).max]),
             ('uint16', u, [iinfo(np.uint16).min, iinfo(np.uint16).max]),
             ('uint32', u, [iinfo(np.uint32).min, iinfo(np.uint32).max]),
-            # Test will be skipped until there is more uint64 support.
-            # ('uint64', u, [iinfo(uint64).min, iinfo(uint64).max]),
+            ('uint64', u, [iinfo(np.uint64).min, iinfo(np.uint64).max]),
             ('int16', i, [iinfo(np.int8).min, iinfo(np.int8).max + 1]),
             ('int32', i, [iinfo(np.int16).min, iinfo(np.int16).max + 1]),
             ('int64', i, [iinfo(np.int32).min, iinfo(np.int32).max + 1]),
@@ -469,8 +472,7 @@ def test_downcast_limits(self):
             ('int64', i, [iinfo(np.int32).min - 1, iinfo(np.int64).max]),
             ('uint16', u, [iinfo(np.uint8).min, iinfo(np.uint8).max + 1]),
             ('uint32', u, [iinfo(np.uint16).min, iinfo(np.uint16).max + 1]),
-            # Test will be skipped until there is more uint64 support.
-            # ('uint64', u, [iinfo(np.uint32).min, iinfo(np.uint32).max + 1]),
+            ('uint64', u, [iinfo(np.uint32).min, iinfo(np.uint32).max + 1])
         ]
 
         for dtype, downcast, min_max in dtype_downcast_min_max:
diff --git a/pandas/tools/util.py b/pandas/tools/util.py
@@ -225,7 +225,7 @@ def to_numeric(arg, errors='raise', downcast=None):
         if typecodes is not None:
             # from smallest to largest
             for dtype in typecodes:
-                if np.dtype(dtype).itemsize < values.dtype.itemsize:
+                if np.dtype(dtype).itemsize <= values.dtype.itemsize:
                     values = _possibly_downcast_to_dtype(
                         values, dtype)
 
diff --git a/pandas/types/cast.py b/pandas/types/cast.py
@@ -101,8 +101,8 @@ def trans(x):  # noqa
             arr = np.array([r[0]])
 
             # if we have any nulls, then we are done
-            if isnull(arr).any() or not np.allclose(arr,
-                                                    trans(arr).astype(dtype)):
+            if (isnull(arr).any() or
+                    not np.allclose(arr, trans(arr).astype(dtype), rtol=0)):
                 return result
 
             # a comparable, e.g. a Decimal may slip in here
@@ -114,7 +114,7 @@ def trans(x):  # noqa
                     notnull(result).all()):
                 new_result = trans(result).astype(dtype)
                 try:
-                    if np.allclose(new_result, result):
+                    if np.allclose(new_result, result, rtol=0):
                         return new_result
                 except: