diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 539649df05046..c288a008777cf 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -701,6 +701,7 @@ Datetimelike - Bug in :meth:`DatetimeIndex.strftime` and :meth:`Series.dt.strftime` where ``NaT`` was converted to the string ``'NaT'`` instead of ``np.nan`` (:issue:`29578`) - Bug in :attr:`Timestamp.resolution` being a property instead of a class attribute (:issue:`29910`) - Bug in :func:`pandas.to_datetime` when called with ``None`` raising ``TypeError`` instead of returning ``NaT`` (:issue:`30011`) +- Bug in :func:`pandas.to_datetime` failing for `deques` when using ``cache=True`` (the default) (:issue:`29403`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index c3edf2d5d853f..8fa4b500b8c1e 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -1,6 +1,7 @@ from collections import abc from datetime import datetime, time from functools import partial +from itertools import islice from typing import Optional, TypeVar, Union import numpy as np @@ -111,7 +112,7 @@ def should_cache( assert 0 < unique_share < 1, "unique_share must be in next bounds: (0; 1)" - unique_elements = unique(arg[:check_count]) + unique_elements = set(islice(arg, check_count)) if len(unique_elements) > check_count * unique_share: do_caching = False return do_caching diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index d9dd049583cc4..08c14c36a195e 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1,6 +1,7 @@ """ test to_datetime """ import calendar +from collections import deque from datetime import datetime, time import locale @@ -861,7 +862,7 @@ def test_datetime_invalid_index(self, values, format, infer): @pytest.mark.parametrize("utc", [True, None]) @pytest.mark.parametrize("format", ["%Y%m%d %H:%M:%S", None]) - @pytest.mark.parametrize("constructor", [list, tuple, np.array, pd.Index]) + @pytest.mark.parametrize("constructor", [list, tuple, np.array, pd.Index, deque]) def test_to_datetime_cache(self, utc, format, constructor): date = "20130101 00:00:00" test_dates = [date] * 10 ** 5 @@ -872,6 +873,24 @@ def test_to_datetime_cache(self, utc, format, constructor): tm.assert_index_equal(result, expected) + @pytest.mark.parametrize( + "listlike", + [ + (deque([pd.Timestamp("2010-06-02 09:30:00")] * 51)), + ([pd.Timestamp("2010-06-02 09:30:00")] * 51), + (tuple([pd.Timestamp("2010-06-02 09:30:00")] * 51)), + ], + ) + def test_no_slicing_errors_in_should_cache(self, listlike): + # GH 29403 + assert tools.should_cache(listlike) is True + + def test_to_datetime_from_deque(self): + # GH 29403 + result = pd.to_datetime(deque([pd.Timestamp("2010-06-02 09:30:00")] * 51)) + expected = pd.to_datetime([pd.Timestamp("2010-06-02 09:30:00")] * 51) + tm.assert_index_equal(result, expected) + @pytest.mark.parametrize("utc", [True, None]) @pytest.mark.parametrize("format", ["%Y%m%d %H:%M:%S", None]) def test_to_datetime_cache_series(self, utc, format):