From a561ab3bcd86d251e65659ee0dfe717663f728d8 Mon Sep 17 00:00:00 2001 From: Mike Kutzma Date: Fri, 6 Mar 2020 23:44:57 -0500 Subject: [PATCH 01/10] [BUG] add consistency to_numeric on empty list to_numeric should work similarly on empty lists for downcast=unsigned/float/integer Addresses: GH32493 --- pandas/core/tools/numeric.py | 2 +- pandas/tests/tools/test_to_numeric.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 7d1e4bbd8fb05..4116904bc06e0 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -162,7 +162,7 @@ def to_numeric(arg, errors="raise", downcast=None): if downcast in ("integer", "signed"): typecodes = np.typecodes["Integer"] - elif downcast == "unsigned" and np.min(values) >= 0: + elif downcast == "unsigned" and np.min(values, initial=0) >= 0: typecodes = np.typecodes["UnsignedInteger"] elif downcast == "float": typecodes = np.typecodes["Float"] diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index 19385e797467c..cc5c736f6eace 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -627,3 +627,12 @@ def test_non_coerce_uint64_conflict(errors, exp): else: result = to_numeric(ser, errors=errors) tm.assert_series_equal(result, ser) + + +def test_downcast_empty(): + dc_int = pd.to_numeric([], downcast="integer") + dc_float = pd.to_numeric([], downcast="float") + dc_us = pd.to_numeric([], downcast="unsigned") + + assert np.array_equal(dc_int, dc_float) + assert np.array_equal(dc_us, dc_float) From c7239d5d22d9346e24da13de720a5f79eb3eb551 Mon Sep 17 00:00:00 2001 From: Mike Kutzma Date: Sat, 7 Mar 2020 17:22:48 -0500 Subject: [PATCH 02/10] Add pytest.mark.parametrize to tests --- pandas/tests/tools/test_to_numeric.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index cc5c736f6eace..f5081894229a0 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -629,10 +629,10 @@ def test_non_coerce_uint64_conflict(errors, exp): tm.assert_series_equal(result, ser) -def test_downcast_empty(): - dc_int = pd.to_numeric([], downcast="integer") - dc_float = pd.to_numeric([], downcast="float") - dc_us = pd.to_numeric([], downcast="unsigned") +@pytest.mark.parametrize("dc1", ["integer", "float", "unsigned"]) +@pytest.mark.parametrize("dc2", ["integer", "float", "unsigned"]) +def test_downcast_empty(dc1, dc2): - assert np.array_equal(dc_int, dc_float) - assert np.array_equal(dc_us, dc_float) + assert np.array_equal( + pd.to_numeric([], downcast=dc1), pd.to_numeric([], downcast=dc2) + ) From b97e10c0bccf55a32277775bc9b352624fee15e6 Mon Sep 17 00:00:00 2001 From: Mike Kutzma Date: Sat, 7 Mar 2020 17:22:48 -0500 Subject: [PATCH 03/10] Add pytest.mark.parametrize to tests --- pandas/tests/tools/test_to_numeric.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index cc5c736f6eace..f5081894229a0 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -629,10 +629,10 @@ def test_non_coerce_uint64_conflict(errors, exp): tm.assert_series_equal(result, ser) -def test_downcast_empty(): - dc_int = pd.to_numeric([], downcast="integer") - dc_float = pd.to_numeric([], downcast="float") - dc_us = pd.to_numeric([], downcast="unsigned") +@pytest.mark.parametrize("dc1", ["integer", "float", "unsigned"]) +@pytest.mark.parametrize("dc2", ["integer", "float", "unsigned"]) +def test_downcast_empty(dc1, dc2): - assert np.array_equal(dc_int, dc_float) - assert np.array_equal(dc_us, dc_float) + assert np.array_equal( + pd.to_numeric([], downcast=dc1), pd.to_numeric([], downcast=dc2) + ) From cbac61f2a4acf1a5c839580ecc5a1ae65adc48cb Mon Sep 17 00:00:00 2001 From: Mike Kutzma Date: Tue, 10 Mar 2020 17:02:35 -0400 Subject: [PATCH 04/10] Use len(values) to avoid numpy compat issues --- pandas/core/tools/numeric.py | 2 +- pandas/tests/tools/test_to_numeric.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 4116904bc06e0..df048769a4ace 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -162,7 +162,7 @@ def to_numeric(arg, errors="raise", downcast=None): if downcast in ("integer", "signed"): typecodes = np.typecodes["Integer"] - elif downcast == "unsigned" and np.min(values, initial=0) >= 0: + elif downcast == "unsigned" and (len(values) == 0 or np.min(values) >= 0): typecodes = np.typecodes["UnsignedInteger"] elif downcast == "float": typecodes = np.typecodes["Float"] diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index f5081894229a0..780ab2aa02935 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -632,7 +632,10 @@ def test_non_coerce_uint64_conflict(errors, exp): @pytest.mark.parametrize("dc1", ["integer", "float", "unsigned"]) @pytest.mark.parametrize("dc2", ["integer", "float", "unsigned"]) def test_downcast_empty(dc1, dc2): + # GH32493 - assert np.array_equal( - pd.to_numeric([], downcast=dc1), pd.to_numeric([], downcast=dc2) + tm.assert_numpy_array_equal( + pd.to_numeric([], downcast=dc1), + pd.to_numeric([], downcast=dc2), + check_dtype=False, ) From b1ad5c14522d69d6b82b10d3d0bd3702cc4ea575 Mon Sep 17 00:00:00 2001 From: Mike Kutzma Date: Wed, 11 Mar 2020 17:53:15 -0400 Subject: [PATCH 05/10] Add whatsnew entry for 32493 --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index e745bf3f5feed..bdddd22475749 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -232,6 +232,7 @@ Numeric ^^^^^^^ - Bug in :meth:`DataFrame.floordiv` with ``axis=0`` not treating division-by-zero like :meth:`Series.floordiv` (:issue:`31271`) - Bug in :meth:`to_numeric` with string argument ``"uint64"`` and ``errors="coerce"`` silently fails (:issue:`32394`) +- Bug in :meth:`to_numeric` with ``downcast="unsigned"`` fails for empty data (:issue:`32493`) - Conversion From e051ec93f733c4d6d8f722c90d9ad3d252dad546 Mon Sep 17 00:00:00 2001 From: Mike Kutzma Date: Fri, 6 Mar 2020 23:44:57 -0500 Subject: [PATCH 06/10] [BUG] add consistency to_numeric on empty list to_numeric should work similarly on empty lists for downcast=unsigned/float/integer Addresses: GH32493 --- pandas/core/tools/numeric.py | 2 +- pandas/tests/tools/test_to_numeric.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 7d1e4bbd8fb05..4116904bc06e0 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -162,7 +162,7 @@ def to_numeric(arg, errors="raise", downcast=None): if downcast in ("integer", "signed"): typecodes = np.typecodes["Integer"] - elif downcast == "unsigned" and np.min(values) >= 0: + elif downcast == "unsigned" and np.min(values, initial=0) >= 0: typecodes = np.typecodes["UnsignedInteger"] elif downcast == "float": typecodes = np.typecodes["Float"] diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index 19385e797467c..cc5c736f6eace 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -627,3 +627,12 @@ def test_non_coerce_uint64_conflict(errors, exp): else: result = to_numeric(ser, errors=errors) tm.assert_series_equal(result, ser) + + +def test_downcast_empty(): + dc_int = pd.to_numeric([], downcast="integer") + dc_float = pd.to_numeric([], downcast="float") + dc_us = pd.to_numeric([], downcast="unsigned") + + assert np.array_equal(dc_int, dc_float) + assert np.array_equal(dc_us, dc_float) From 587f1423d8e6f4be501175bd99855bfe71d1b209 Mon Sep 17 00:00:00 2001 From: Mike Kutzma Date: Sat, 7 Mar 2020 17:22:48 -0500 Subject: [PATCH 07/10] Add pytest.mark.parametrize to tests --- pandas/tests/tools/test_to_numeric.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index cc5c736f6eace..f5081894229a0 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -629,10 +629,10 @@ def test_non_coerce_uint64_conflict(errors, exp): tm.assert_series_equal(result, ser) -def test_downcast_empty(): - dc_int = pd.to_numeric([], downcast="integer") - dc_float = pd.to_numeric([], downcast="float") - dc_us = pd.to_numeric([], downcast="unsigned") +@pytest.mark.parametrize("dc1", ["integer", "float", "unsigned"]) +@pytest.mark.parametrize("dc2", ["integer", "float", "unsigned"]) +def test_downcast_empty(dc1, dc2): - assert np.array_equal(dc_int, dc_float) - assert np.array_equal(dc_us, dc_float) + assert np.array_equal( + pd.to_numeric([], downcast=dc1), pd.to_numeric([], downcast=dc2) + ) From fa6a27186d770cc1b68cf5eb7c0973e7d66c78ea Mon Sep 17 00:00:00 2001 From: Mike Kutzma Date: Tue, 10 Mar 2020 17:02:35 -0400 Subject: [PATCH 08/10] Use len(values) to avoid numpy compat issues --- pandas/core/tools/numeric.py | 2 +- pandas/tests/tools/test_to_numeric.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 4116904bc06e0..df048769a4ace 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -162,7 +162,7 @@ def to_numeric(arg, errors="raise", downcast=None): if downcast in ("integer", "signed"): typecodes = np.typecodes["Integer"] - elif downcast == "unsigned" and np.min(values, initial=0) >= 0: + elif downcast == "unsigned" and (len(values) == 0 or np.min(values) >= 0): typecodes = np.typecodes["UnsignedInteger"] elif downcast == "float": typecodes = np.typecodes["Float"] diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index f5081894229a0..780ab2aa02935 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -632,7 +632,10 @@ def test_non_coerce_uint64_conflict(errors, exp): @pytest.mark.parametrize("dc1", ["integer", "float", "unsigned"]) @pytest.mark.parametrize("dc2", ["integer", "float", "unsigned"]) def test_downcast_empty(dc1, dc2): + # GH32493 - assert np.array_equal( - pd.to_numeric([], downcast=dc1), pd.to_numeric([], downcast=dc2) + tm.assert_numpy_array_equal( + pd.to_numeric([], downcast=dc1), + pd.to_numeric([], downcast=dc2), + check_dtype=False, ) From e19371e99d7f8d75b54e8692bce3b0bee9511f6d Mon Sep 17 00:00:00 2001 From: Mike Kutzma Date: Wed, 11 Mar 2020 17:53:15 -0400 Subject: [PATCH 09/10] Add whatsnew entry for 32493 --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 9a3c6ebdc7284..5c6b7149a9462 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -266,6 +266,7 @@ Numeric ^^^^^^^ - Bug in :meth:`DataFrame.floordiv` with ``axis=0`` not treating division-by-zero like :meth:`Series.floordiv` (:issue:`31271`) - Bug in :meth:`to_numeric` with string argument ``"uint64"`` and ``errors="coerce"`` silently fails (:issue:`32394`) +- Bug in :meth:`to_numeric` with ``downcast="unsigned"`` fails for empty data (:issue:`32493`) - Conversion From b6f44e4f52d776ffc3013c68fefee6749b50319e Mon Sep 17 00:00:00 2001 From: Mike Kutzma Date: Sat, 14 Mar 2020 20:40:14 -0400 Subject: [PATCH 10/10] Use not len() over len==0 in to_numeric for unsigned --- pandas/core/tools/numeric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index df048769a4ace..a6198f8b752ae 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -162,7 +162,7 @@ def to_numeric(arg, errors="raise", downcast=None): if downcast in ("integer", "signed"): typecodes = np.typecodes["Integer"] - elif downcast == "unsigned" and (len(values) == 0 or np.min(values) >= 0): + elif downcast == "unsigned" and (not len(values) or np.min(values) >= 0): typecodes = np.typecodes["UnsignedInteger"] elif downcast == "float": typecodes = np.typecodes["Float"]