From 2d2488cbf488680c141df993c5e2b9a61b137069 Mon Sep 17 00:00:00 2001 From: Roger Thomas Date: Fri, 9 Dec 2016 14:38:14 +0000 Subject: [PATCH 1/3] Fix To Numeric on Decimal Fields --- doc/source/whatsnew/v0.20.0.txt | 2 ++ pandas/src/inference.pyx | 4 ++++ pandas/tools/tests/test_util.py | 21 +++++++++++++++++++++ pandas/tools/util.py | 3 +++ 4 files changed, 30 insertions(+) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index aeafc76876bbd..4d12d32084bf4 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -109,3 +109,5 @@ Performance Improvements Bug Fixes ~~~~~~~~~ + +- Bug in ``DataFrame(..).apply(to_numeric)`` when values are of type decimal.Decimal. (:issue:`14827`) diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index 5ac2c70bb1808..66c4da65ca52e 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -1,3 +1,4 @@ +from decimal import Decimal import sys cimport util from tslib import NaT, get_timezone @@ -673,6 +674,9 @@ def maybe_convert_numeric(object[:] values, set na_values, elif util.is_complex_object(val): complexes[i] = val seen_complex = True + elif isinstance(val, Decimal): + floats[i] = complexes[i] = val + seen_float = True else: try: status = floatify(val, &fval, &maybe_int) diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py index f9647721e3c5b..2575b8c77419a 100644 --- a/pandas/tools/tests/test_util.py +++ b/pandas/tools/tests/test_util.py @@ -2,6 +2,7 @@ import locale import codecs import nose +import decimal import numpy as np from numpy import iinfo @@ -208,6 +209,26 @@ def test_numeric(self): res = to_numeric(s) tm.assert_series_equal(res, expected) + # GH 14827 + df = pd.DataFrame(dict( + a=[1.2, decimal.Decimal(3.14), decimal.Decimal("infinity"), '0.1'], + b=[1.0, 2.0, 3.0, 4.0], + )) + expected = pd.DataFrame(dict( + a=[1.2, 3.14, np.inf, 0.1], + b=[1.0, 2.0, 3.0, 4.0], + )) + + # Test to_numeric over one column + df_copy = df.copy() + df_copy['a'] = df_copy['a'].apply(to_numeric) + tm.assert_frame_equal(df_copy, expected) + + # Test to_numeric over multiple columns + df_copy = df.copy() + df_copy[['a', 'b']] = df_copy[['a', 'b']].apply(to_numeric) + tm.assert_frame_equal(df_copy, expected) + def test_all_nan(self): s = pd.Series(['a', 'b', 'c']) res = to_numeric(s, errors='coerce') diff --git a/pandas/tools/util.py b/pandas/tools/util.py index b50bf9dc448bc..84f21c01ab312 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -1,3 +1,4 @@ +from decimal import Decimal import numpy as np import pandas.lib as lib @@ -173,6 +174,8 @@ def to_numeric(arg, errors='raise', downcast=None): values = arg.values elif isinstance(arg, (list, tuple)): values = np.array(arg, dtype='O') + elif isinstance(arg, Decimal): + return float(arg) elif np.isscalar(arg): if is_number(arg): return arg From 1f1c62ce6d2189986b109d0d251c0fff992ac239 Mon Sep 17 00:00:00 2001 From: Roger Thomas Date: Mon, 19 Dec 2016 17:43:39 +0000 Subject: [PATCH 2/3] Add Test And Refactor is_decimal --- pandas/lib.pyx | 4 +++- pandas/src/inference.pyx | 8 ++++++-- pandas/tools/tests/test_util.py | 20 ++++++++++++++++++++ pandas/tools/util.py | 8 ++++---- 4 files changed, 33 insertions(+), 7 deletions(-) diff --git a/pandas/lib.pyx b/pandas/lib.pyx index b09a1c2755a06..548a96780d37a 100644 --- a/pandas/lib.pyx +++ b/pandas/lib.pyx @@ -313,6 +313,7 @@ def isscalar(object val): - instances of datetime.datetime - instances of datetime.timedelta - Period + - instances of decimal.Decimal """ @@ -325,7 +326,8 @@ def isscalar(object val): or PyDate_Check(val) or PyDelta_Check(val) or PyTime_Check(val) - or util.is_period_object(val)) + or util.is_period_object(val) + or is_decimal(val)) def item_from_zerodim(object val): diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index 66c4da65ca52e..a8b694d7ba008 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -1,5 +1,5 @@ -from decimal import Decimal import sys +from decimal import Decimal cimport util from tslib import NaT, get_timezone from datetime import datetime, timedelta @@ -29,6 +29,10 @@ def is_bool(object obj): def is_complex(object obj): return util.is_complex_object(obj) + +def is_decimal(object obj): + return isinstance(obj, Decimal) + cpdef bint is_period(object val): """ Return a boolean if this is a Period object """ return util.is_period_object(val) @@ -674,7 +678,7 @@ def maybe_convert_numeric(object[:] values, set na_values, elif util.is_complex_object(val): complexes[i] = val seen_complex = True - elif isinstance(val, Decimal): + elif is_decimal(val): floats[i] = complexes[i] = val seen_float = True else: diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py index 2575b8c77419a..f808abcda9418 100644 --- a/pandas/tools/tests/test_util.py +++ b/pandas/tools/tests/test_util.py @@ -229,6 +229,26 @@ def test_numeric(self): df_copy[['a', 'b']] = df_copy[['a', 'b']].apply(to_numeric) tm.assert_frame_equal(df_copy, expected) + def test_numeric_lists_and_arrays(self): + # Test to_numeric with embedded lists and arrays + df = pd.DataFrame(dict( + a=[[decimal.Decimal(3.14), 1.0], decimal.Decimal(1.6), 0.1] + )) + df['a'] = df['a'].apply(to_numeric) + expected = pd.DataFrame(dict( + a=[[3.14, 1.0], 1.6, 0.1], + )) + tm.assert_frame_equal(df, expected) + + df = pd.DataFrame(dict( + a=[np.array([decimal.Decimal(3.14), 1.0]), 0.1] + )) + df['a'] = df['a'].apply(to_numeric) + expected = pd.DataFrame(dict( + a=[[3.14, 1.0], 0.1], + )) + tm.assert_frame_equal(df, expected) + def test_all_nan(self): s = pd.Series(['a', 'b', 'c']) res = to_numeric(s, errors='coerce') diff --git a/pandas/tools/util.py b/pandas/tools/util.py index 84f21c01ab312..f539e62b64343 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -1,4 +1,3 @@ -from decimal import Decimal import numpy as np import pandas.lib as lib @@ -7,6 +6,7 @@ is_datetime_or_timedelta_dtype, is_list_like, _ensure_object) + from pandas.types.cast import _possibly_downcast_to_dtype import pandas as pd @@ -174,9 +174,9 @@ def to_numeric(arg, errors='raise', downcast=None): values = arg.values elif isinstance(arg, (list, tuple)): values = np.array(arg, dtype='O') - elif isinstance(arg, Decimal): - return float(arg) - elif np.isscalar(arg): + elif lib.isscalar(arg): + if lib.is_decimal(arg): + return float(arg) if is_number(arg): return arg is_scalar = True From d7972d74c34cf0f4388162cb71e7b31de0abdb77 Mon Sep 17 00:00:00 2001 From: Roger Thomas Date: Mon, 19 Dec 2016 20:02:21 +0000 Subject: [PATCH 3/3] Move isdecimal to internal api --- pandas/tools/util.py | 8 +++++--- pandas/types/inference.py | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/tools/util.py b/pandas/tools/util.py index f539e62b64343..daecf3d093680 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -5,7 +5,9 @@ is_numeric_dtype, is_datetime_or_timedelta_dtype, is_list_like, - _ensure_object) + _ensure_object, + is_decimal, + is_scalar as isscalar) from pandas.types.cast import _possibly_downcast_to_dtype @@ -174,8 +176,8 @@ def to_numeric(arg, errors='raise', downcast=None): values = arg.values elif isinstance(arg, (list, tuple)): values = np.array(arg, dtype='O') - elif lib.isscalar(arg): - if lib.is_decimal(arg): + elif isscalar(arg): + if is_decimal(arg): return float(arg) if is_number(arg): return arg diff --git a/pandas/types/inference.py b/pandas/types/inference.py index 35a2dc2fb831b..d2a2924b27659 100644 --- a/pandas/types/inference.py +++ b/pandas/types/inference.py @@ -18,6 +18,8 @@ is_scalar = lib.isscalar +is_decimal = lib.is_decimal + def is_number(obj): return isinstance(obj, (Number, np.number))