diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 02c7ac150c6af..b0169dd8ac896 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -291,6 +291,7 @@ Bug Fixes - Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`) - Bug in ``DataFrame.sort_values()`` when sorting by multiple columns where one column is of type ``int64`` and contains ``NaT`` (:issue:`14922`) - Bug in ``DataFrame.reindex()`` in which ``method`` was ignored when passing ``columns`` (:issue:`14992`) +- Bug in ``pd.to_numeric()`` in which float and unsigned integer elements were being improperly casted (:issue:`14941`, :issue:`15005`) - Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`) diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py index f808abcda9418..8a8960a057926 100644 --- a/pandas/tools/tests/test_util.py +++ b/pandas/tools/tests/test_util.py @@ -426,12 +426,16 @@ def test_downcast(self): # cannot cast to an integer (signed or unsigned) # because we have a float number - data = ['1.1', 2, 3] - expected = np.array([1.1, 2, 3], dtype=np.float64) + data = (['1.1', 2, 3], + [10000.0, 20000, 3000, 40000.36, 50000, 50000.00]) + expected = (np.array([1.1, 2, 3], dtype=np.float64), + np.array([10000.0, 20000, 3000, + 40000.36, 50000, 50000.00], dtype=np.float64)) - for downcast in ('integer', 'signed', 'unsigned'): - res = pd.to_numeric(data, downcast=downcast) - tm.assert_numpy_array_equal(res, expected) + for _data, _expected in zip(data, expected): + for downcast in ('integer', 'signed', 'unsigned'): + res = pd.to_numeric(_data, downcast=downcast) + tm.assert_numpy_array_equal(res, _expected) # the smallest integer dtype need not be np.(u)int8 data = ['256', 257, 258] @@ -459,8 +463,7 @@ def test_downcast_limits(self): ('uint8', u, [iinfo(np.uint8).min, iinfo(np.uint8).max]), ('uint16', u, [iinfo(np.uint16).min, iinfo(np.uint16).max]), ('uint32', u, [iinfo(np.uint32).min, iinfo(np.uint32).max]), - # Test will be skipped until there is more uint64 support. - # ('uint64', u, [iinfo(uint64).min, iinfo(uint64).max]), + ('uint64', u, [iinfo(np.uint64).min, iinfo(np.uint64).max]), ('int16', i, [iinfo(np.int8).min, iinfo(np.int8).max + 1]), ('int32', i, [iinfo(np.int16).min, iinfo(np.int16).max + 1]), ('int64', i, [iinfo(np.int32).min, iinfo(np.int32).max + 1]), @@ -469,8 +472,7 @@ def test_downcast_limits(self): ('int64', i, [iinfo(np.int32).min - 1, iinfo(np.int64).max]), ('uint16', u, [iinfo(np.uint8).min, iinfo(np.uint8).max + 1]), ('uint32', u, [iinfo(np.uint16).min, iinfo(np.uint16).max + 1]), - # Test will be skipped until there is more uint64 support. - # ('uint64', u, [iinfo(np.uint32).min, iinfo(np.uint32).max + 1]), + ('uint64', u, [iinfo(np.uint32).min, iinfo(np.uint32).max + 1]) ] for dtype, downcast, min_max in dtype_downcast_min_max: diff --git a/pandas/tools/util.py b/pandas/tools/util.py index daecf3d093680..381e29283d417 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -225,7 +225,7 @@ def to_numeric(arg, errors='raise', downcast=None): if typecodes is not None: # from smallest to largest for dtype in typecodes: - if np.dtype(dtype).itemsize < values.dtype.itemsize: + if np.dtype(dtype).itemsize <= values.dtype.itemsize: values = _possibly_downcast_to_dtype( values, dtype) diff --git a/pandas/types/cast.py b/pandas/types/cast.py index ff4fb73d6a9b6..6b1c3f9c00351 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -101,8 +101,8 @@ def trans(x): # noqa arr = np.array([r[0]]) # if we have any nulls, then we are done - if isnull(arr).any() or not np.allclose(arr, - trans(arr).astype(dtype)): + if (isnull(arr).any() or + not np.allclose(arr, trans(arr).astype(dtype), rtol=0)): return result # a comparable, e.g. a Decimal may slip in here @@ -114,7 +114,7 @@ def trans(x): # noqa notnull(result).all()): new_result = trans(result).astype(dtype) try: - if np.allclose(new_result, result): + if np.allclose(new_result, result, rtol=0): return new_result except: