diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 83a70aa34fccf..e34330f18a52d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -245,6 +245,7 @@ Bug Fixes - Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`) - Bug in ``DataFrame`` construction in which unsigned 64-bit integer elements were being converted to objects (:issue:`14881`) - Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`) +- Bug in ``DataFrame(..).apply(to_numeric)`` when values are of type decimal.Decimal. (:issue:`14827`) - Bug in ``describe()`` when passing a numpy array which does not contain the median to the ``percentiles`` keyword argument (:issue:`14908`) diff --git a/pandas/lib.pyx b/pandas/lib.pyx index b09a1c2755a06..548a96780d37a 100644 --- a/pandas/lib.pyx +++ b/pandas/lib.pyx @@ -313,6 +313,7 @@ def isscalar(object val): - instances of datetime.datetime - instances of datetime.timedelta - Period + - instances of decimal.Decimal """ @@ -325,7 +326,8 @@ def isscalar(object val): or PyDate_Check(val) or PyDelta_Check(val) or PyTime_Check(val) - or util.is_period_object(val)) + or util.is_period_object(val) + or is_decimal(val)) def item_from_zerodim(object val): diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index 5ac2c70bb1808..a8b694d7ba008 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -1,4 +1,5 @@ import sys +from decimal import Decimal cimport util from tslib import NaT, get_timezone from datetime import datetime, timedelta @@ -28,6 +29,10 @@ def is_bool(object obj): def is_complex(object obj): return util.is_complex_object(obj) + +def is_decimal(object obj): + return isinstance(obj, Decimal) + cpdef bint is_period(object val): """ Return a boolean if this is a Period object """ return util.is_period_object(val) @@ -673,6 +678,9 @@ def maybe_convert_numeric(object[:] values, set na_values, elif util.is_complex_object(val): complexes[i] = val seen_complex = True + elif is_decimal(val): + floats[i] = complexes[i] = val + seen_float = True else: try: status = floatify(val, &fval, &maybe_int) diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py index f9647721e3c5b..f808abcda9418 100644 --- a/pandas/tools/tests/test_util.py +++ b/pandas/tools/tests/test_util.py @@ -2,6 +2,7 @@ import locale import codecs import nose +import decimal import numpy as np from numpy import iinfo @@ -208,6 +209,46 @@ def test_numeric(self): res = to_numeric(s) tm.assert_series_equal(res, expected) + # GH 14827 + df = pd.DataFrame(dict( + a=[1.2, decimal.Decimal(3.14), decimal.Decimal("infinity"), '0.1'], + b=[1.0, 2.0, 3.0, 4.0], + )) + expected = pd.DataFrame(dict( + a=[1.2, 3.14, np.inf, 0.1], + b=[1.0, 2.0, 3.0, 4.0], + )) + + # Test to_numeric over one column + df_copy = df.copy() + df_copy['a'] = df_copy['a'].apply(to_numeric) + tm.assert_frame_equal(df_copy, expected) + + # Test to_numeric over multiple columns + df_copy = df.copy() + df_copy[['a', 'b']] = df_copy[['a', 'b']].apply(to_numeric) + tm.assert_frame_equal(df_copy, expected) + + def test_numeric_lists_and_arrays(self): + # Test to_numeric with embedded lists and arrays + df = pd.DataFrame(dict( + a=[[decimal.Decimal(3.14), 1.0], decimal.Decimal(1.6), 0.1] + )) + df['a'] = df['a'].apply(to_numeric) + expected = pd.DataFrame(dict( + a=[[3.14, 1.0], 1.6, 0.1], + )) + tm.assert_frame_equal(df, expected) + + df = pd.DataFrame(dict( + a=[np.array([decimal.Decimal(3.14), 1.0]), 0.1] + )) + df['a'] = df['a'].apply(to_numeric) + expected = pd.DataFrame(dict( + a=[[3.14, 1.0], 0.1], + )) + tm.assert_frame_equal(df, expected) + def test_all_nan(self): s = pd.Series(['a', 'b', 'c']) res = to_numeric(s, errors='coerce') diff --git a/pandas/tools/util.py b/pandas/tools/util.py index b50bf9dc448bc..daecf3d093680 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -5,7 +5,10 @@ is_numeric_dtype, is_datetime_or_timedelta_dtype, is_list_like, - _ensure_object) + _ensure_object, + is_decimal, + is_scalar as isscalar) + from pandas.types.cast import _possibly_downcast_to_dtype import pandas as pd @@ -173,7 +176,9 @@ def to_numeric(arg, errors='raise', downcast=None): values = arg.values elif isinstance(arg, (list, tuple)): values = np.array(arg, dtype='O') - elif np.isscalar(arg): + elif isscalar(arg): + if is_decimal(arg): + return float(arg) if is_number(arg): return arg is_scalar = True diff --git a/pandas/types/inference.py b/pandas/types/inference.py index 35a2dc2fb831b..d2a2924b27659 100644 --- a/pandas/types/inference.py +++ b/pandas/types/inference.py @@ -18,6 +18,8 @@ is_scalar = lib.isscalar +is_decimal = lib.is_decimal + def is_number(obj): return isinstance(obj, (Number, np.number))