From 870769f043bc0a75093b8a45f16c8dbfe1ac998f Mon Sep 17 00:00:00 2001 From: Hector Lopez Date: Mon, 30 Aug 2021 18:56:22 -0500 Subject: [PATCH 1/8] Handle date arguments as objects --- pandas/core/tools/numeric.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 7d2bb75934c33..c188752d41d1e 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -137,6 +137,11 @@ def to_numeric(arg, errors="raise", downcast=None): if errors not in ("ignore", "raise", "coerce"): raise ValueError("invalid error value specified") + # Handle inputs of "date" type as objects + arg_dtype = getattr(arg, "dtype", None) + if is_datetime_or_timedelta_dtype(arg_dtype): + arg = arg._constructor(arg, dtype="O") + is_series = False is_index = False is_scalars = False From 1f0b361485c0503693a63ebddb45581381dd3d88 Mon Sep 17 00:00:00 2001 From: Hector Lopez Date: Mon, 30 Aug 2021 18:56:56 -0500 Subject: [PATCH 2/8] Add docstrings --- pandas/core/tools/numeric.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index c188752d41d1e..2da477bca6710 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -102,18 +102,22 @@ def to_numeric(arg, errors="raise", downcast=None): 1 2 2 -3 dtype: int8 - >>> s = pd.Series(['apple', '1.0', '2', -3]) + >>> s = pd.Series(['apple', '1.0', '2', -3, pd.to_datetime(0), pd.NaT]) >>> pd.to_numeric(s, errors='ignore') - 0 apple - 1 1.0 - 2 2 - 3 -3 + 0 apple + 1 1.0 + 2 2 + 3 -3 + 4 1970-01-01 00:00:00 + 5 NaT dtype: object >>> pd.to_numeric(s, errors='coerce') 0 NaN 1 1.0 2 2.0 3 -3.0 + 4 NaN + 5 NaN dtype: float64 Downcasting of nullable integer and floating dtypes is supported: From 08a7bb8edf01c7a64b3449fdb726e3975b225f39 Mon Sep 17 00:00:00 2001 From: Hector Lopez Date: Mon, 30 Aug 2021 20:12:09 -0500 Subject: [PATCH 3/8] Add support for numpy arrays and remove deprecated conversion --- pandas/core/tools/numeric.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 2da477bca6710..104b625d08e45 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -144,7 +144,11 @@ def to_numeric(arg, errors="raise", downcast=None): # Handle inputs of "date" type as objects arg_dtype = getattr(arg, "dtype", None) if is_datetime_or_timedelta_dtype(arg_dtype): - arg = arg._constructor(arg, dtype="O") + try: + arg = arg._constructor(arg, dtype="O") + except AttributeError: + # when `arg` is a a numpy array + arg = arg.astype("O") is_series = False is_index = False @@ -155,10 +159,7 @@ def to_numeric(arg, errors="raise", downcast=None): values = arg.values elif isinstance(arg, ABCIndex): is_index = True - if needs_i8_conversion(arg.dtype): - values = arg.asi8 - else: - values = arg.values + values = arg.values elif isinstance(arg, (list, tuple)): values = np.array(arg, dtype="O") elif is_scalar(arg): From 05e1f2d69a00adf256949166732313fc8b1aa1f7 Mon Sep 17 00:00:00 2001 From: Hector Lopez Date: Mon, 30 Aug 2021 20:16:37 -0500 Subject: [PATCH 4/8] Remove tests that depend on the old behavior --- pandas/tests/tools/test_to_numeric.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index 643a5617abbeb..7ac33bad4f592 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -450,7 +450,6 @@ def test_errors_invalid_value(): [ ["1", 2, 3], [1, 2, 3], - np.array(["1970-01-02", "1970-01-03", "1970-01-04"], dtype="datetime64[D]"), ], ) @pytest.mark.parametrize( @@ -478,7 +477,6 @@ def test_downcast_basic(data, kwargs, exp_dtype): [ ["1", 2, 3], [1, 2, 3], - np.array(["1970-01-02", "1970-01-03", "1970-01-04"], dtype="datetime64[D]"), ], ) def test_signed_downcast(data, signed_downcast): From 5a5fd7747bbfb577c2e3f2bb309ae99a7e7a4f71 Mon Sep 17 00:00:00 2001 From: Hector Lopez Date: Mon, 30 Aug 2021 20:52:53 -0500 Subject: [PATCH 5/8] Add test_type_error and generalize test_ignore_error --- pandas/tests/tools/test_to_numeric.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index 7ac33bad4f592..d9ae910f2d05f 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -111,10 +111,28 @@ def test_error(data, msg): @pytest.mark.parametrize( - "errors,exp_data", [("ignore", [1, -3.14, "apple"]), ("coerce", [1, -3.14, np.nan])] + "data,msg", + [ + ([22.06, "-86", pd.NaT], "Invalid object type at position 2"), + ([pd.to_datetime(0), 22.06, "-86", pd.NaT], "Invalid object type at position 0"), + ], +) +def test_type_error(data, msg): + ser = Series(data) + + with pytest.raises(TypeError, match=msg): + to_numeric(ser, errors="raise") + + +@pytest.mark.parametrize( + "errors,exp_data", + [ + ("ignore", [1, -3.14, "apple", pd.to_datetime(0), pd.NaT]), + ("coerce", [1, -3.14, np.nan, np.nan, np.nan]) + ], ) def test_ignore_error(errors, exp_data): - ser = Series([1, -3.14, "apple"]) + ser = Series([1, -3.14, "apple", pd.to_datetime(0), pd.NaT]) result = to_numeric(ser, errors=errors) expected = Series(exp_data) From f844dc46cb5d52bd30630a7de28b10c36412bd3f Mon Sep 17 00:00:00 2001 From: Hector Lopez Date: Mon, 30 Aug 2021 20:56:41 -0500 Subject: [PATCH 6/8] Add test_list_series Test that the function returns the same results for list and pd.Series --- pandas/tests/tools/test_to_numeric.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index d9ae910f2d05f..308e0add4b72f 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -77,6 +77,30 @@ def test_series(last_val): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize( + "list_data,kwargs", + [ + (["-3.14", 7], {}), + ( + ["-3.14", 7, pd.to_datetime(0), pd.NaT, ["30", -10]], + {"errors": "coerce"} + ), + ( + ["-3.14", 7, pd.to_datetime(0), pd.NaT, ["30", -10]], + {"errors": "ignore"} + ), + ] +) +def test_list_series(list_data, kwargs): + lis = list_data + ser = Series(list_data) + + result = to_numeric(lis, **kwargs) + expected = to_numeric(ser, **kwargs).values + + tm.assert_numpy_array_equal(result, expected) + + @pytest.mark.parametrize( "data", [ From ffc1a6e9c31a4cc1e79d516d01df8b4968336682 Mon Sep 17 00:00:00 2001 From: Hector Lopez Date: Mon, 30 Aug 2021 20:58:35 -0500 Subject: [PATCH 7/8] PEP 8 --- pandas/tests/tools/test_to_numeric.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index 308e0add4b72f..219f7b309ca15 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -138,7 +138,10 @@ def test_error(data, msg): "data,msg", [ ([22.06, "-86", pd.NaT], "Invalid object type at position 2"), - ([pd.to_datetime(0), 22.06, "-86", pd.NaT], "Invalid object type at position 0"), + ( + [pd.to_datetime(0), 22.06, "-86", pd.NaT], + "Invalid object type at position 0" + ), ], ) def test_type_error(data, msg): From bcf1cca28ed7d9ff677eb405466baf1ed21dfd9e Mon Sep 17 00:00:00 2001 From: Hector Lopez Date: Tue, 31 Aug 2021 09:35:10 -0500 Subject: [PATCH 8/8] Remove failing tests that depends in the old behavior --- pandas/tests/tools/test_to_numeric.py | 33 --------------------------- 1 file changed, 33 deletions(-) diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index 219f7b309ca15..96afbff7dfbb7 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -417,39 +417,6 @@ def test_str(data, exp, transform_assert_equal): assert_equal(result, expected) -def test_datetime_like(tz_naive_fixture, transform_assert_equal): - transform, assert_equal = transform_assert_equal - idx = pd.date_range("20130101", periods=3, tz=tz_naive_fixture) - - result = to_numeric(transform(idx)) - expected = transform(idx.asi8) - assert_equal(result, expected) - - -def test_timedelta(transform_assert_equal): - transform, assert_equal = transform_assert_equal - idx = pd.timedelta_range("1 days", periods=3, freq="D") - - result = to_numeric(transform(idx)) - expected = transform(idx.asi8) - assert_equal(result, expected) - - -def test_period(transform_assert_equal): - transform, assert_equal = transform_assert_equal - - idx = pd.period_range("2011-01", periods=3, freq="M", name="") - inp = transform(idx) - - if isinstance(inp, Index): - result = to_numeric(inp) - expected = transform(idx.asi8) - assert_equal(result, expected) - else: - # TODO: PeriodDtype, so support it in to_numeric. - pytest.skip("Missing PeriodDtype support in to_numeric") - - @pytest.mark.parametrize( "errors,expected", [