Skip to content

Commit 5353e59

Browse files
gfyoungjreback
authored andcommitted
BUG: Patch float and uint handling in to_numeric
1) Patches `float` handling by reducing the "closeness" level when checking conversions. 2) Patches `uint` handling by allowing casts to `uint` dtypes of equal or lesser size to `int64` (when values are less than `INT64_MAX` Closes #14941. Follow-up to #15005. Author: gfyoung <[email protected]> Closes #15024 from gfyoung/to-numeric-uint and squashes the following commits: 9e35819 [gfyoung] BUG: Patch float and uint handling in to_numeric
1 parent 0252385 commit 5353e59

File tree

4 files changed

+16
-13
lines changed

4 files changed

+16
-13
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ Bug Fixes
291291
- Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`)
292292
- Bug in ``DataFrame.sort_values()`` when sorting by multiple columns where one column is of type ``int64`` and contains ``NaT`` (:issue:`14922`)
293293
- Bug in ``DataFrame.reindex()`` in which ``method`` was ignored when passing ``columns`` (:issue:`14992`)
294+
- Bug in ``pd.to_numeric()`` in which float and unsigned integer elements were being improperly casted (:issue:`14941`, :issue:`15005`)
294295

295296

296297
- Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`)

pandas/tools/tests/test_util.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -426,12 +426,16 @@ def test_downcast(self):
426426

427427
# cannot cast to an integer (signed or unsigned)
428428
# because we have a float number
429-
data = ['1.1', 2, 3]
430-
expected = np.array([1.1, 2, 3], dtype=np.float64)
429+
data = (['1.1', 2, 3],
430+
[10000.0, 20000, 3000, 40000.36, 50000, 50000.00])
431+
expected = (np.array([1.1, 2, 3], dtype=np.float64),
432+
np.array([10000.0, 20000, 3000,
433+
40000.36, 50000, 50000.00], dtype=np.float64))
431434

432-
for downcast in ('integer', 'signed', 'unsigned'):
433-
res = pd.to_numeric(data, downcast=downcast)
434-
tm.assert_numpy_array_equal(res, expected)
435+
for _data, _expected in zip(data, expected):
436+
for downcast in ('integer', 'signed', 'unsigned'):
437+
res = pd.to_numeric(_data, downcast=downcast)
438+
tm.assert_numpy_array_equal(res, _expected)
435439

436440
# the smallest integer dtype need not be np.(u)int8
437441
data = ['256', 257, 258]
@@ -459,8 +463,7 @@ def test_downcast_limits(self):
459463
('uint8', u, [iinfo(np.uint8).min, iinfo(np.uint8).max]),
460464
('uint16', u, [iinfo(np.uint16).min, iinfo(np.uint16).max]),
461465
('uint32', u, [iinfo(np.uint32).min, iinfo(np.uint32).max]),
462-
# Test will be skipped until there is more uint64 support.
463-
# ('uint64', u, [iinfo(uint64).min, iinfo(uint64).max]),
466+
('uint64', u, [iinfo(np.uint64).min, iinfo(np.uint64).max]),
464467
('int16', i, [iinfo(np.int8).min, iinfo(np.int8).max + 1]),
465468
('int32', i, [iinfo(np.int16).min, iinfo(np.int16).max + 1]),
466469
('int64', i, [iinfo(np.int32).min, iinfo(np.int32).max + 1]),
@@ -469,8 +472,7 @@ def test_downcast_limits(self):
469472
('int64', i, [iinfo(np.int32).min - 1, iinfo(np.int64).max]),
470473
('uint16', u, [iinfo(np.uint8).min, iinfo(np.uint8).max + 1]),
471474
('uint32', u, [iinfo(np.uint16).min, iinfo(np.uint16).max + 1]),
472-
# Test will be skipped until there is more uint64 support.
473-
# ('uint64', u, [iinfo(np.uint32).min, iinfo(np.uint32).max + 1]),
475+
('uint64', u, [iinfo(np.uint32).min, iinfo(np.uint32).max + 1])
474476
]
475477

476478
for dtype, downcast, min_max in dtype_downcast_min_max:

pandas/tools/util.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ def to_numeric(arg, errors='raise', downcast=None):
225225
if typecodes is not None:
226226
# from smallest to largest
227227
for dtype in typecodes:
228-
if np.dtype(dtype).itemsize < values.dtype.itemsize:
228+
if np.dtype(dtype).itemsize <= values.dtype.itemsize:
229229
values = _possibly_downcast_to_dtype(
230230
values, dtype)
231231

pandas/types/cast.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,8 @@ def trans(x): # noqa
101101
arr = np.array([r[0]])
102102

103103
# if we have any nulls, then we are done
104-
if isnull(arr).any() or not np.allclose(arr,
105-
trans(arr).astype(dtype)):
104+
if (isnull(arr).any() or
105+
not np.allclose(arr, trans(arr).astype(dtype), rtol=0)):
106106
return result
107107

108108
# a comparable, e.g. a Decimal may slip in here
@@ -114,7 +114,7 @@ def trans(x): # noqa
114114
notnull(result).all()):
115115
new_result = trans(result).astype(dtype)
116116
try:
117-
if np.allclose(new_result, result):
117+
if np.allclose(new_result, result, rtol=0):
118118
return new_result
119119
except:
120120

0 commit comments

Comments
 (0)