From 11eddeb51775de49a2bc291d1bc9548cdf17b423 Mon Sep 17 00:00:00 2001 From: Nathalie Rud Date: Fri, 31 May 2019 02:36:38 +0100 Subject: [PATCH] BUG: fix TypeError for invalid integer dates %Y%m%d with errors='ignore' (# GH 26583) array_strptime returned TypeError when trying to slice 'too long' integer for the given format %Y%m%d (for example 2121010101). After parsing date in the first 8 symbols it tried to return the remaining symbols in ValueError message as a slice of integer which in turn caused TypeError. Converted to string value is now used to make slice for that ValueError message. In case of 20209911, it tried to parse 20209911 to datetime(2020, 9, 9) and had 11 unparsed. --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/_libs/tslibs/strptime.pyx | 6 +++--- pandas/tests/indexes/datetimes/test_tools.py | 19 +++++++++++++++++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 1619ba1a45739..5d30cbe9e2a15 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -529,6 +529,7 @@ Datetimelike - Bug in :func:`to_datetime` which does not replace the invalid argument with ``NaT`` when error is set to coerce (:issue:`26122`) - Bug in adding :class:`DateOffset` with nonzero month to :class:`DatetimeIndex` would raise ``ValueError`` (:issue:`26258`) - Bug in :func:`to_datetime` which raises unhandled ``OverflowError`` when called with mix of invalid dates and ``NaN`` values with ``format='%Y%m%d'`` and ``error='coerce'`` (:issue:`25512`) +- Bug in :func:`to_datetime` which raises ``TypeError`` for ``format='%Y%m%d'`` when called for invalid integer dates with length >= 6 digits with ``errors='ignore'`` Timedelta ^^^^^^^^^ diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index af3d3fa646a12..d93858cff5e05 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -140,13 +140,13 @@ def array_strptime(object[:] values, object fmt, iresult[i] = NPY_NAT continue raise ValueError("time data %r does not match " - "format %r (match)" % (values[i], fmt)) + "format %r (match)" % (val, fmt)) if len(val) != found.end(): if is_coerce: iresult[i] = NPY_NAT continue raise ValueError("unconverted data remains: %s" % - values[i][found.end():]) + val[found.end():]) # search else: @@ -156,7 +156,7 @@ def array_strptime(object[:] values, object fmt, iresult[i] = NPY_NAT continue raise ValueError("time data %r does not match format " - "%r (search)" % (values[i], fmt)) + "%r (search)" % (val, fmt)) iso_year = -1 year = 1900 diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index c507c31ee54dd..ea33e563b31be 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -133,6 +133,25 @@ def test_to_datetime_format_integer(self, cache): result = to_datetime(s, format='%Y%m', cache=cache) assert_series_equal(result, expected) + @pytest.mark.parametrize('int_date, expected', [ + # valid date, length == 8 + [20121030, datetime(2012, 10, 30)], + # short valid date, length == 6 + [199934, datetime(1999, 3, 4)], + # long integer date partially parsed to datetime(2012,1,1), length > 8 + [2012010101, 2012010101], + # invalid date partially parsed to datetime(2012,9,9), length == 8 + [20129930, 20129930], + # short integer date partially parsed to datetime(2012,9,9), length < 8 + [2012993, 2012993], + # short invalid date, length == 4 + [2121, 2121]]) + def test_int_to_datetime_format_YYYYMMDD_typeerror(self, int_date, + expected): + # GH 26583 + result = to_datetime(int_date, format='%Y%m%d', errors='ignore') + assert result == expected + @pytest.mark.parametrize('cache', [True, False]) def test_to_datetime_format_microsecond(self, cache):