diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 1619ba1a45739..5d30cbe9e2a15 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -529,6 +529,7 @@ Datetimelike - Bug in :func:`to_datetime` which does not replace the invalid argument with ``NaT`` when error is set to coerce (:issue:`26122`) - Bug in adding :class:`DateOffset` with nonzero month to :class:`DatetimeIndex` would raise ``ValueError`` (:issue:`26258`) - Bug in :func:`to_datetime` which raises unhandled ``OverflowError`` when called with mix of invalid dates and ``NaN`` values with ``format='%Y%m%d'`` and ``error='coerce'`` (:issue:`25512`) +- Bug in :func:`to_datetime` which raises ``TypeError`` for ``format='%Y%m%d'`` when called for invalid integer dates with length >= 6 digits with ``errors='ignore'`` Timedelta ^^^^^^^^^ diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index af3d3fa646a12..d93858cff5e05 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -140,13 +140,13 @@ def array_strptime(object[:] values, object fmt, iresult[i] = NPY_NAT continue raise ValueError("time data %r does not match " - "format %r (match)" % (values[i], fmt)) + "format %r (match)" % (val, fmt)) if len(val) != found.end(): if is_coerce: iresult[i] = NPY_NAT continue raise ValueError("unconverted data remains: %s" % - values[i][found.end():]) + val[found.end():]) # search else: @@ -156,7 +156,7 @@ def array_strptime(object[:] values, object fmt, iresult[i] = NPY_NAT continue raise ValueError("time data %r does not match format " - "%r (search)" % (values[i], fmt)) + "%r (search)" % (val, fmt)) iso_year = -1 year = 1900 diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index c507c31ee54dd..ea33e563b31be 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -133,6 +133,25 @@ def test_to_datetime_format_integer(self, cache): result = to_datetime(s, format='%Y%m', cache=cache) assert_series_equal(result, expected) + @pytest.mark.parametrize('int_date, expected', [ + # valid date, length == 8 + [20121030, datetime(2012, 10, 30)], + # short valid date, length == 6 + [199934, datetime(1999, 3, 4)], + # long integer date partially parsed to datetime(2012,1,1), length > 8 + [2012010101, 2012010101], + # invalid date partially parsed to datetime(2012,9,9), length == 8 + [20129930, 20129930], + # short integer date partially parsed to datetime(2012,9,9), length < 8 + [2012993, 2012993], + # short invalid date, length == 4 + [2121, 2121]]) + def test_int_to_datetime_format_YYYYMMDD_typeerror(self, int_date, + expected): + # GH 26583 + result = to_datetime(int_date, format='%Y%m%d', errors='ignore') + assert result == expected + @pytest.mark.parametrize('cache', [True, False]) def test_to_datetime_format_microsecond(self, cache):