diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index c441244b4415d..856fbd1237a03 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -266,7 +266,7 @@ Datetimelike ^^^^^^^^^^^^ - Bug in :func:`to_datetime` which would raise an (incorrect) ``ValueError`` when called with a date far into the future and the ``format`` argument specified instead of raising ``OutOfBoundsDatetime`` (:issue:`23830`) -- +- Bug in :func:`to_datetime` which would raise ``InvalidIndexError: Reindexing only valid with uniquely valued Index objects`` when called with ``cache=True``, with ``arg`` including at least two different elements from the set {None, numpy.nan, pandas.NaT} (:issue:`22305`) - - diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 66d563a7c6f85..1ad39e7ad357a 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -52,9 +52,10 @@ def _maybe_cache(arg, format, cache, convert_listlike): if cache: # Perform a quicker unique check from pandas import Index - if not Index(arg).is_unique: - unique_dates = algorithms.unique(arg) - cache_dates = convert_listlike(unique_dates, True, format) + unique_dates = Index(arg).unique() + if len(unique_dates) < len(arg): + cache_dates = convert_listlike(unique_dates.to_numpy(), + True, format) cache_array = Series(cache_dates, index=unique_dates) return cache_array diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index eaf689cfa1c21..a592ef941484e 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1630,6 +1630,15 @@ def test_parsers(self, date_str, expected, cache): yearfirst=yearfirst) assert result7 == expected + @pytest.mark.parametrize('cache', [True, False]) + def test_na_values_with_cache(self, cache, unique_nulls_fixture, + unique_nulls_fixture2): + # GH22305 + expected = Index([NaT, NaT], dtype='datetime64[ns]') + result = to_datetime([unique_nulls_fixture, unique_nulls_fixture2], + cache=cache) + tm.assert_index_equal(result, expected) + def test_parsers_nat(self): # Test that each of several string-accepting methods return pd.NaT result1, _, _ = parsing.parse_time_string('NaT')