Skip to content

Fix to numeric on decimal fields #14842

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ Bug Fixes
- Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`)
- Bug in ``DataFrame`` construction in which unsigned 64-bit integer elements were being converted to objects (:issue:`14881`)
- Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`)
- Bug in ``DataFrame(..).apply(to_numeric)`` when values are of type decimal.Decimal. (:issue:`14827`)
- Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`)


Expand Down
4 changes: 3 additions & 1 deletion pandas/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ def isscalar(object val):
- instances of datetime.datetime
- instances of datetime.timedelta
- Period
- instances of decimal.Decimal

"""

Expand All @@ -325,7 +326,8 @@ def isscalar(object val):
or PyDate_Check(val)
or PyDelta_Check(val)
or PyTime_Check(val)
or util.is_period_object(val))
or util.is_period_object(val)
or is_decimal(val))


def item_from_zerodim(object val):
Expand Down
8 changes: 8 additions & 0 deletions pandas/src/inference.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sys
from decimal import Decimal
cimport util
from tslib import NaT, get_timezone
from datetime import datetime, timedelta
Expand Down Expand Up @@ -28,6 +29,10 @@ def is_bool(object obj):
def is_complex(object obj):
return util.is_complex_object(obj)


def is_decimal(object obj):
return isinstance(obj, Decimal)

cpdef bint is_period(object val):
""" Return a boolean if this is a Period object """
return util.is_period_object(val)
Expand Down Expand Up @@ -673,6 +678,9 @@ def maybe_convert_numeric(object[:] values, set na_values,
elif util.is_complex_object(val):
complexes[i] = val
seen_complex = True
elif is_decimal(val):
floats[i] = complexes[i] = val
seen_float = True
else:
try:
status = floatify(val, &fval, &maybe_int)
Expand Down
41 changes: 41 additions & 0 deletions pandas/tools/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import locale
import codecs
import nose
import decimal

import numpy as np
from numpy import iinfo
Expand Down Expand Up @@ -208,6 +209,46 @@ def test_numeric(self):
res = to_numeric(s)
tm.assert_series_equal(res, expected)

# GH 14827
df = pd.DataFrame(dict(
a=[1.2, decimal.Decimal(3.14), decimal.Decimal("infinity"), '0.1'],
b=[1.0, 2.0, 3.0, 4.0],
))
expected = pd.DataFrame(dict(
a=[1.2, 3.14, np.inf, 0.1],
b=[1.0, 2.0, 3.0, 4.0],
))

# Test to_numeric over one column
df_copy = df.copy()
df_copy['a'] = df_copy['a'].apply(to_numeric)
tm.assert_frame_equal(df_copy, expected)

# Test to_numeric over multiple columns
df_copy = df.copy()
df_copy[['a', 'b']] = df_copy[['a', 'b']].apply(to_numeric)
tm.assert_frame_equal(df_copy, expected)

def test_numeric_lists_and_arrays(self):
# Test to_numeric with embedded lists and arrays
df = pd.DataFrame(dict(
a=[[decimal.Decimal(3.14), 1.0], decimal.Decimal(1.6), 0.1]
))
df['a'] = df['a'].apply(to_numeric)
expected = pd.DataFrame(dict(
a=[[3.14, 1.0], 1.6, 0.1],
))
tm.assert_frame_equal(df, expected)

df = pd.DataFrame(dict(
a=[np.array([decimal.Decimal(3.14), 1.0]), 0.1]
))
df['a'] = df['a'].apply(to_numeric)
expected = pd.DataFrame(dict(
a=[[3.14, 1.0], 0.1],
))
tm.assert_frame_equal(df, expected)

def test_all_nan(self):
s = pd.Series(['a', 'b', 'c'])
res = to_numeric(s, errors='coerce')
Expand Down
9 changes: 7 additions & 2 deletions pandas/tools/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
is_numeric_dtype,
is_datetime_or_timedelta_dtype,
is_list_like,
_ensure_object)
_ensure_object,
is_decimal,
is_scalar as isscalar)

from pandas.types.cast import _possibly_downcast_to_dtype

import pandas as pd
Expand Down Expand Up @@ -173,7 +176,9 @@ def to_numeric(arg, errors='raise', downcast=None):
values = arg.values
elif isinstance(arg, (list, tuple)):
values = np.array(arg, dtype='O')
elif np.isscalar(arg):
elif isscalar(arg):
if is_decimal(arg):
return float(arg)
if is_number(arg):
return arg
is_scalar = True
Expand Down
2 changes: 2 additions & 0 deletions pandas/types/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

is_scalar = lib.isscalar

is_decimal = lib.is_decimal


def is_number(obj):
return isinstance(obj, (Number, np.number))
Expand Down