From bea934d9113bc35361dcff53381dfe0f0e2f0adb Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 18 Sep 2019 15:54:20 -0700 Subject: [PATCH 1/4] CLN: specific exceptions for maybe_convert_numeric --- pandas/core/dtypes/cast.py | 6 +-- pandas/core/tools/numeric.py | 23 +++++----- pandas/io/parsers.py | 13 +++--- pandas/tests/dtypes/test_inference.py | 61 +++++++++++++-------------- 4 files changed, 52 insertions(+), 51 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index ac9b57dc8d342..0e30e337187f7 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -875,7 +875,7 @@ def soft_convert_objects( if numeric and is_object_dtype(values.dtype): try: converted = lib.maybe_convert_numeric(values, set(), coerce_numeric=True) - except Exception: + except (ValueError, TypeError): pass else: # If all NaNs, then do not-alter @@ -953,9 +953,9 @@ def try_datetime(v): # we might have a sequence of the same-datetimes with tz's # if so coerce to a DatetimeIndex; if they are not the same, # then these stay as object dtype, xref GH19671 + from pandas._libs.tslibs import conversion + from pandas import DatetimeIndex try: - from pandas._libs.tslibs import conversion - from pandas import DatetimeIndex values, tz = conversion.datetime_to_datetime64(v) return DatetimeIndex(values).tz_localize("UTC").tz_convert(tz=tz) diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index a0e2c8d9cab65..fa33d11bda7eb 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -137,21 +137,20 @@ def to_numeric(arg, errors="raise", downcast=None): else: values = arg - try: - if is_numeric_dtype(values): - pass - elif is_datetime_or_timedelta_dtype(values): - values = values.astype(np.int64) - else: - values = ensure_object(values) - coerce_numeric = errors not in ("ignore", "raise") + if is_numeric_dtype(values): + pass + elif is_datetime_or_timedelta_dtype(values): + values = values.astype(np.int64) + else: + values = ensure_object(values) + coerce_numeric = errors not in ("ignore", "raise") + try: values = lib.maybe_convert_numeric( values, set(), coerce_numeric=coerce_numeric ) - - except Exception: - if errors == "raise": - raise + except (ValueError, TypeError): + if errors == "raise": + raise # attempt downcast only if the data has been successfully converted # to a numerical dtype and if a downcast method has been specified diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 72f1adf0aad3d..3678e32943b2e 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1782,14 +1782,17 @@ def _infer_types(self, values, na_values, try_num_bool=True): np.putmask(values, mask, np.nan) return values, na_count - if try_num_bool: + if try_num_bool and is_object_dtype(values.dtype): + # exclude e.g DatetimeIndex here try: result = lib.maybe_convert_numeric(values, na_values, False) - na_count = isna(result).sum() - except Exception: + except (ValueError, TypeError): + # e.g. encountering datetime string gets ValueError + # TypeError can be raised in floatify result = values - if values.dtype == np.object_: - na_count = parsers.sanitize_objects(result, na_values, False) + na_count = parsers.sanitize_objects(result, na_values, False) + else: + na_count = isna(result).sum() else: result = values if values.dtype == np.object_: diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 0a8707bdac3a0..81960bca6235e 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -379,9 +379,10 @@ def test_isinf_scalar(self): assert not libmissing.isneginf_scalar(1) assert not libmissing.isneginf_scalar("a") - def test_maybe_convert_numeric_infinities(self): + @pytest.mark.parametrize("maybe_int", [True, False]) + @pytest.mark.parametrize("infinity", ["inf", "inF", "iNf", "Inf", "iNF", "InF", "INf", "INF"]) + def test_maybe_convert_numeric_infinities(self, infinity, maybe_int): # see gh-13274 - infinities = ["inf", "inF", "iNf", "Inf", "iNF", "InF", "INf", "INF"] na_values = {"", "NULL", "nan"} pos = np.array(["inf"], dtype=np.float64) @@ -389,35 +390,33 @@ def test_maybe_convert_numeric_infinities(self): msg = "Unable to parse string" - for infinity in infinities: - for maybe_int in (True, False): - out = lib.maybe_convert_numeric( - np.array([infinity], dtype=object), na_values, maybe_int - ) - tm.assert_numpy_array_equal(out, pos) - - out = lib.maybe_convert_numeric( - np.array(["-" + infinity], dtype=object), na_values, maybe_int - ) - tm.assert_numpy_array_equal(out, neg) - - out = lib.maybe_convert_numeric( - np.array([infinity], dtype=object), na_values, maybe_int - ) - tm.assert_numpy_array_equal(out, pos) - - out = lib.maybe_convert_numeric( - np.array(["+" + infinity], dtype=object), na_values, maybe_int - ) - tm.assert_numpy_array_equal(out, pos) - - # too many characters - with pytest.raises(ValueError, match=msg): - lib.maybe_convert_numeric( - np.array(["foo_" + infinity], dtype=object), - na_values, - maybe_int, - ) + out = lib.maybe_convert_numeric( + np.array([infinity], dtype=object), na_values, maybe_int + ) + tm.assert_numpy_array_equal(out, pos) + + out = lib.maybe_convert_numeric( + np.array(["-" + infinity], dtype=object), na_values, maybe_int + ) + tm.assert_numpy_array_equal(out, neg) + + out = lib.maybe_convert_numeric( + np.array([infinity], dtype=object), na_values, maybe_int + ) + tm.assert_numpy_array_equal(out, pos) + + out = lib.maybe_convert_numeric( + np.array(["+" + infinity], dtype=object), na_values, maybe_int + ) + tm.assert_numpy_array_equal(out, pos) + + # too many characters + with pytest.raises(ValueError, match=msg): + lib.maybe_convert_numeric( + np.array(["foo_" + infinity], dtype=object), + na_values, + maybe_int, + ) def test_maybe_convert_numeric_post_floatify_nan(self, coerce): # see gh-13314 From d8a2634b2f3299acd9ffcdf99687c1317b5ea3bb Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 18 Sep 2019 15:54:51 -0700 Subject: [PATCH 2/4] CLN: specific exceptions for maybe_convert_numeric --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 0e30e337187f7..5348b1b070225 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -796,7 +796,7 @@ def maybe_convert_objects(values: np.ndarray, convert_numeric: bool = True): new_values = lib.maybe_convert_numeric( values, set(), coerce_numeric=True ) - except Exception: + except (ValueError, TypeError): pass else: # if we are all nans then leave me alone From 0215d7cb15cb055f91b55f0f57cd196106ea7a48 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 18 Sep 2019 16:08:27 -0700 Subject: [PATCH 3/4] blackify --- pandas/tests/dtypes/test_inference.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 81960bca6235e..cfa6304909bb7 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -380,7 +380,9 @@ def test_isinf_scalar(self): assert not libmissing.isneginf_scalar("a") @pytest.mark.parametrize("maybe_int", [True, False]) - @pytest.mark.parametrize("infinity", ["inf", "inF", "iNf", "Inf", "iNF", "InF", "INf", "INF"]) + @pytest.mark.parametrize( + "infinity", ["inf", "inF", "iNf", "Inf", "iNF", "InF", "INf", "INF"] + ) def test_maybe_convert_numeric_infinities(self, infinity, maybe_int): # see gh-13274 na_values = {"", "NULL", "nan"} @@ -413,9 +415,7 @@ def test_maybe_convert_numeric_infinities(self, infinity, maybe_int): # too many characters with pytest.raises(ValueError, match=msg): lib.maybe_convert_numeric( - np.array(["foo_" + infinity], dtype=object), - na_values, - maybe_int, + np.array(["foo_" + infinity], dtype=object), na_values, maybe_int ) def test_maybe_convert_numeric_post_floatify_nan(self, coerce): From 2b0e545eed48bfd290497a4d02eee08f9d935049 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 18 Sep 2019 17:31:46 -0700 Subject: [PATCH 4/4] blackify --- pandas/core/dtypes/cast.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 5348b1b070225..e31918c21c2ac 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -955,6 +955,7 @@ def try_datetime(v): # then these stay as object dtype, xref GH19671 from pandas._libs.tslibs import conversion from pandas import DatetimeIndex + try: values, tz = conversion.datetime_to_datetime64(v)