From 1582a7ba8c2b05c16f85a8d6a1d305cfa78b1198 Mon Sep 17 00:00:00 2001 From: Yixin Xiao Date: Wed, 8 Apr 2020 00:28:48 -0400 Subject: [PATCH 1/6] ENH: Support downcasting of nullable dtypes in to_numeric Converts extension array into numpy array before passing into allclose in maybe_downcast_numeric --- pandas/core/dtypes/cast.py | 7 ++----- pandas/core/tools/numeric.py | 1 - 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 223cc43d158e6..b4933f9f37d3c 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -143,9 +143,7 @@ def maybe_downcast_to_dtype(result, dtype): else: dtype = "object" - dtype = np.dtype(dtype) - converted = maybe_downcast_numeric(result, dtype, do_round) if converted is not result: return converted @@ -210,9 +208,7 @@ def trans(x): # don't allow upcasts here (except if empty) if result.dtype.itemsize <= dtype.itemsize and result.size: return result - if is_bool_dtype(dtype) or is_integer_dtype(dtype): - if not result.size: # if we don't have any elements, just astype it return trans(result).astype(dtype) @@ -239,7 +235,8 @@ def trans(x): if (new_result == result).all(): return new_result else: - if np.allclose(new_result, result, rtol=0): + nd_result = np.array(result).astype(result[0].dtype) + if np.allclose(new_result, nd_result, rtol=0): return new_result elif ( diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index f4eb16602f8a0..cdda33ab6941b 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -159,7 +159,6 @@ def to_numeric(arg, errors="raise", downcast=None): # to a numerical dtype and if a downcast method has been specified if downcast is not None and is_numeric_dtype(values): typecodes = None - if downcast in ("integer", "signed"): typecodes = np.typecodes["Integer"] elif downcast == "unsigned" and (not len(values) or np.min(values) >= 0): From 306339864060d051c0f28285b0e1b5d0cf412a16 Mon Sep 17 00:00:00 2001 From: Yixin Xiao Date: Wed, 8 Apr 2020 13:27:17 -0400 Subject: [PATCH 2/6] ENH: test for GH 33013 --- pandas/tests/tools/test_to_numeric.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index 263887a8ea36e..1e82dcbc569a4 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -649,3 +649,15 @@ def test_failure_to_convert_uint64_string_to_NaN(): ser = Series([32, 64, np.nan]) result = to_numeric(pd.Series(["32", "64", "uint64"]), errors="coerce") tm.assert_series_equal(result, ser) + + +def test_support_downcast_of_nullable_dtypes(): + # GH 33013 + try: + pd.to_numeric(pd.Series([1, 2, 3], dtype="Int32"), downcast="integer") + pd.to_numeric(pd.Series([1, 2, 3], dtype="Int64"), downcast="integer") + pd.to_numeric(pd.Series([1, 2], dtype="Int32"), downcast="signed") + pd.to_numeric(pd.Series([1, 2, 3], dtype="Int32"), downcast="float") + pd.to_numeric(pd.Series([1, 2, 3], dtype="Float32"), downcast="integer") + except TypeError: + pytest.fail("TypeError raised.") From 2c26aed4972390d43ccb8341147c853fb444ab41 Mon Sep 17 00:00:00 2001 From: Yixin Xiao Date: Wed, 8 Apr 2020 13:42:11 -0400 Subject: [PATCH 3/6] Comment to clarify change in maybe_downcast_numeric --- pandas/core/dtypes/cast.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index b4933f9f37d3c..cf68bea852a00 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -235,6 +235,7 @@ def trans(x): if (new_result == result).all(): return new_result else: + # np.allclose raises TypeError on extension arrays nd_result = np.array(result).astype(result[0].dtype) if np.allclose(new_result, nd_result, rtol=0): return new_result From b5065219595dd87a5ab26f2ceabf0524f3324346 Mon Sep 17 00:00:00 2001 From: Yixin Xiao Date: Wed, 8 Apr 2020 13:51:02 -0400 Subject: [PATCH 4/6] Updated docstring of maybe_downcast_numeric --- pandas/core/dtypes/cast.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index cf68bea852a00..e1ead4afd2486 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -178,7 +178,8 @@ def maybe_downcast_to_dtype(result, dtype): def maybe_downcast_numeric(result, dtype, do_round: bool = False): """ - Subset of maybe_downcast_to_dtype restricted to numeric dtypes. + Subset of maybe_downcast_to_dtype restricted to numeric and + nullable dtypes. Parameters ---------- From 5501a99161752a1b756d3c24d00ea4dda5d59165 Mon Sep 17 00:00:00 2001 From: Yixin Xiao Date: Wed, 8 Apr 2020 16:14:25 -0400 Subject: [PATCH 5/6] Updated documentation for v1.1.0 --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index d283d4450e6bf..0ecec532d4340 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -88,6 +88,7 @@ Other enhancements - :class:`Series.str` now has a `fullmatch` method that matches a regular expression against the entire string in each row of the series, similar to `re.fullmatch` (:issue:`32806`). - :meth:`DataFrame.sample` will now also allow array-like and BitGenerator objects to be passed to ``random_state`` as seeds (:issue:`32503`) - :meth:`MultiIndex.union` will now raise `RuntimeWarning` if the object inside are unsortable, pass `sort=False` to suppress this warning (:issue:`33015`) +- :func:`to_numeric` will now support downcasting of nullable dtypes. - .. --------------------------------------------------------------------------- From 159f96d0868ec79a4185e59609cbb0b7d7c622cb Mon Sep 17 00:00:00 2001 From: Yixin Xiao Date: Thu, 9 Apr 2020 12:47:22 -0400 Subject: [PATCH 6/6] modified test --- pandas/tests/tools/test_to_numeric.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index 1e82dcbc569a4..0b24e5b0a843d 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -658,6 +658,5 @@ def test_support_downcast_of_nullable_dtypes(): pd.to_numeric(pd.Series([1, 2, 3], dtype="Int64"), downcast="integer") pd.to_numeric(pd.Series([1, 2], dtype="Int32"), downcast="signed") pd.to_numeric(pd.Series([1, 2, 3], dtype="Int32"), downcast="float") - pd.to_numeric(pd.Series([1, 2, 3], dtype="Float32"), downcast="integer") except TypeError: pytest.fail("TypeError raised.")