Skip to content

Commit 14ee720

Browse files
Roger ThomasShaharBental
Roger Thomas
authored andcommitted
BUG: Fix to numeric on decimal fields
closes pandas-dev#14827 Author: Roger Thomas <[email protected]> Author: Roger Thomas <[email protected]> Closes pandas-dev#14842 from RogerThomas/fix_to_numeric_on_decimal_fields and squashes the following commits: 91d989b [Roger Thomas] Merge branch 'master' of github.com:pandas-dev/pandas into fix_to_numeric_on_decimal_fields d7972d7 [Roger Thomas] Move isdecimal to internal api 1f1c62c [Roger Thomas] Add Test And Refactor is_decimal f1b69da [Roger Thomas] Merge branch 'master' of github.com:pandas-dev/pandas into fix_to_numeric_on_decimal_fields 2d2488c [Roger Thomas] Fix To Numeric on Decimal Fields
1 parent 5c5d04d commit 14ee720

File tree

6 files changed

+62
-3
lines changed

6 files changed

+62
-3
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ Bug Fixes
245245
- Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`)
246246
- Bug in ``DataFrame`` construction in which unsigned 64-bit integer elements were being converted to objects (:issue:`14881`)
247247
- Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`)
248+
- Bug in ``DataFrame(..).apply(to_numeric)`` when values are of type decimal.Decimal. (:issue:`14827`)
248249
- Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`)
249250

250251

pandas/lib.pyx

+3-1
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,7 @@ def isscalar(object val):
313313
- instances of datetime.datetime
314314
- instances of datetime.timedelta
315315
- Period
316+
- instances of decimal.Decimal
316317
317318
"""
318319

@@ -325,7 +326,8 @@ def isscalar(object val):
325326
or PyDate_Check(val)
326327
or PyDelta_Check(val)
327328
or PyTime_Check(val)
328-
or util.is_period_object(val))
329+
or util.is_period_object(val)
330+
or is_decimal(val))
329331

330332

331333
def item_from_zerodim(object val):

pandas/src/inference.pyx

+8
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import sys
2+
from decimal import Decimal
23
cimport util
34
from tslib import NaT, get_timezone
45
from datetime import datetime, timedelta
@@ -28,6 +29,10 @@ def is_bool(object obj):
2829
def is_complex(object obj):
2930
return util.is_complex_object(obj)
3031

32+
33+
def is_decimal(object obj):
34+
return isinstance(obj, Decimal)
35+
3136
cpdef bint is_period(object val):
3237
""" Return a boolean if this is a Period object """
3338
return util.is_period_object(val)
@@ -673,6 +678,9 @@ def maybe_convert_numeric(object[:] values, set na_values,
673678
elif util.is_complex_object(val):
674679
complexes[i] = val
675680
seen_complex = True
681+
elif is_decimal(val):
682+
floats[i] = complexes[i] = val
683+
seen_float = True
676684
else:
677685
try:
678686
status = floatify(val, &fval, &maybe_int)

pandas/tools/tests/test_util.py

+41
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import locale
33
import codecs
44
import nose
5+
import decimal
56

67
import numpy as np
78
from numpy import iinfo
@@ -208,6 +209,46 @@ def test_numeric(self):
208209
res = to_numeric(s)
209210
tm.assert_series_equal(res, expected)
210211

212+
# GH 14827
213+
df = pd.DataFrame(dict(
214+
a=[1.2, decimal.Decimal(3.14), decimal.Decimal("infinity"), '0.1'],
215+
b=[1.0, 2.0, 3.0, 4.0],
216+
))
217+
expected = pd.DataFrame(dict(
218+
a=[1.2, 3.14, np.inf, 0.1],
219+
b=[1.0, 2.0, 3.0, 4.0],
220+
))
221+
222+
# Test to_numeric over one column
223+
df_copy = df.copy()
224+
df_copy['a'] = df_copy['a'].apply(to_numeric)
225+
tm.assert_frame_equal(df_copy, expected)
226+
227+
# Test to_numeric over multiple columns
228+
df_copy = df.copy()
229+
df_copy[['a', 'b']] = df_copy[['a', 'b']].apply(to_numeric)
230+
tm.assert_frame_equal(df_copy, expected)
231+
232+
def test_numeric_lists_and_arrays(self):
233+
# Test to_numeric with embedded lists and arrays
234+
df = pd.DataFrame(dict(
235+
a=[[decimal.Decimal(3.14), 1.0], decimal.Decimal(1.6), 0.1]
236+
))
237+
df['a'] = df['a'].apply(to_numeric)
238+
expected = pd.DataFrame(dict(
239+
a=[[3.14, 1.0], 1.6, 0.1],
240+
))
241+
tm.assert_frame_equal(df, expected)
242+
243+
df = pd.DataFrame(dict(
244+
a=[np.array([decimal.Decimal(3.14), 1.0]), 0.1]
245+
))
246+
df['a'] = df['a'].apply(to_numeric)
247+
expected = pd.DataFrame(dict(
248+
a=[[3.14, 1.0], 0.1],
249+
))
250+
tm.assert_frame_equal(df, expected)
251+
211252
def test_all_nan(self):
212253
s = pd.Series(['a', 'b', 'c'])
213254
res = to_numeric(s, errors='coerce')

pandas/tools/util.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55
is_numeric_dtype,
66
is_datetime_or_timedelta_dtype,
77
is_list_like,
8-
_ensure_object)
8+
_ensure_object,
9+
is_decimal,
10+
is_scalar as isscalar)
11+
912
from pandas.types.cast import _possibly_downcast_to_dtype
1013

1114
import pandas as pd
@@ -173,7 +176,9 @@ def to_numeric(arg, errors='raise', downcast=None):
173176
values = arg.values
174177
elif isinstance(arg, (list, tuple)):
175178
values = np.array(arg, dtype='O')
176-
elif np.isscalar(arg):
179+
elif isscalar(arg):
180+
if is_decimal(arg):
181+
return float(arg)
177182
if is_number(arg):
178183
return arg
179184
is_scalar = True

pandas/types/inference.py

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818

1919
is_scalar = lib.isscalar
2020

21+
is_decimal = lib.is_decimal
22+
2123

2224
def is_number(obj):
2325
return isinstance(obj, (Number, np.number))

0 commit comments

Comments
 (0)