Skip to content

Fix to numeric on decimal fields #14842

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ Bug Fixes
~~~~~~~~~

- Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`)
- Bug in ``DataFrame(..).apply(to_numeric)`` when values are of type decimal.Decimal. (:issue:`14827`)



Expand Down
4 changes: 3 additions & 1 deletion pandas/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ def isscalar(object val):
- instances of datetime.datetime
- instances of datetime.timedelta
- Period
- instances of decimal.Decimal

"""

Expand All @@ -325,7 +326,8 @@ def isscalar(object val):
or PyDate_Check(val)
or PyDelta_Check(val)
or PyTime_Check(val)
or util.is_period_object(val))
or util.is_period_object(val)
or is_decimal(val))


def item_from_zerodim(object val):
Expand Down
8 changes: 8 additions & 0 deletions pandas/src/inference.pyx
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sys
from decimal import Decimal
cimport util
from tslib import NaT, get_timezone
from datetime import datetime, timedelta
Expand Down Expand Up @@ -28,6 +29,10 @@ def is_bool(object obj):
def is_complex(object obj):
return util.is_complex_object(obj)


def is_decimal(object obj):
return isinstance(obj, Decimal)

cpdef bint is_period(object val):
""" Return a boolean if this is a Period object """
return util.is_period_object(val)
Expand Down Expand Up @@ -673,6 +678,9 @@ def maybe_convert_numeric(object[:] values, set na_values,
elif util.is_complex_object(val):
complexes[i] = val
seen_complex = True
elif is_decimal(val):
floats[i] = complexes[i] = val
seen_float = True
else:
try:
status = floatify(val, &fval, &maybe_int)
Expand Down
41 changes: 41 additions & 0 deletions pandas/tools/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import locale
import codecs
import nose
import decimal

import numpy as np
from numpy import iinfo
Expand Down Expand Up @@ -208,6 +209,46 @@ def test_numeric(self):
res = to_numeric(s)
tm.assert_series_equal(res, expected)

# GH 14827
df = pd.DataFrame(dict(
a=[1.2, decimal.Decimal(3.14), decimal.Decimal("infinity"), '0.1'],
b=[1.0, 2.0, 3.0, 4.0],
))
expected = pd.DataFrame(dict(
a=[1.2, 3.14, np.inf, 0.1],
b=[1.0, 2.0, 3.0, 4.0],
))

# Test to_numeric over one column
df_copy = df.copy()
df_copy['a'] = df_copy['a'].apply(to_numeric)
tm.assert_frame_equal(df_copy, expected)

# Test to_numeric over multiple columns
df_copy = df.copy()
df_copy[['a', 'b']] = df_copy[['a', 'b']].apply(to_numeric)
tm.assert_frame_equal(df_copy, expected)

def test_numeric_lists_and_arrays(self):
# Test to_numeric with embedded lists and arrays
df = pd.DataFrame(dict(
a=[[decimal.Decimal(3.14), 1.0], decimal.Decimal(1.6), 0.1]
))
df['a'] = df['a'].apply(to_numeric)
expected = pd.DataFrame(dict(
a=[[3.14, 1.0], 1.6, 0.1],
))
tm.assert_frame_equal(df, expected)

df = pd.DataFrame(dict(
a=[np.array([decimal.Decimal(3.14), 1.0]), 0.1]
))
df['a'] = df['a'].apply(to_numeric)
expected = pd.DataFrame(dict(
a=[[3.14, 1.0], 0.1],
))
tm.assert_frame_equal(df, expected)

def test_all_nan(self):
s = pd.Series(['a', 'b', 'c'])
res = to_numeric(s, errors='coerce')
Expand Down
5 changes: 4 additions & 1 deletion pandas/tools/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
is_datetime_or_timedelta_dtype,
is_list_like,
_ensure_object)

from pandas.types.cast import _possibly_downcast_to_dtype

import pandas as pd
Expand Down Expand Up @@ -173,7 +174,9 @@ def to_numeric(arg, errors='raise', downcast=None):
values = arg.values
elif isinstance(arg, (list, tuple)):
values = np.array(arg, dtype='O')
elif np.isscalar(arg):
elif lib.isscalar(arg):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

import these from pandas.types.common (like everything else). you prob have to fix is_decimal to import also (it should be imported into pandas.types.inference, which pandas.types.common subseuqently imports). pandas.types.common is the internal API.

if lib.is_decimal(arg):
return float(arg)
if is_number(arg):
return arg
is_scalar = True
Expand Down