From cd2038174ab7050b63f07160b16a6610dab64e24 Mon Sep 17 00:00:00 2001 From: Michael Date: Fri, 12 Mar 2021 23:19:38 -0800 Subject: [PATCH 1/3] BUG: Do not attempt to cache unhashable values in to_datetime (#39756) --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/tools/datetimes.py | 6 +++++- pandas/tests/tools/test_to_datetime.py | 6 ++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 56a5412d4ecfc..5bb39bc75d6ed 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -592,6 +592,7 @@ Reshaping - Bug in :meth:`DataFrame.pivot_table` returning a ``MultiIndex`` for a single value when operating on and empty ``DataFrame`` (:issue:`13483`) - Allow :class:`Index` to be passed to the :func:`numpy.all` function (:issue:`40180`) - Bug in :meth:`DataFrame.stack` not preserving ``CategoricalDtype`` in a ``MultiIndex`` (:issue:`36991`) +- Bug in :func:`to_datetime` raising error when input sequence contains unhashable items (:issue:`39756`) Sparse ^^^^^^ diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 9822356d11d7c..67e7792b10330 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -147,7 +147,11 @@ def should_cache( assert 0 < unique_share < 1, "unique_share must be in next bounds: (0; 1)" - unique_elements = set(islice(arg, check_count)) + try: + # We can't cache if the items are not hashable. + unique_elements = set(islice(arg, check_count)) + except TypeError: + return False if len(unique_elements) > check_count * unique_share: do_caching = False return do_caching diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 999a04a81406e..4004d4a34ede2 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1651,6 +1651,12 @@ def test_to_datetime_unprocessable_input(self, cache): with pytest.raises(TypeError, match=msg): to_datetime([1, "1"], errors="raise", cache=cache) + @pytest.mark.parametrize("cache", [True, False]) + def test_to_datetime_unhashable_input(self, cache): + series = Series([['a']]*100) + result = to_datetime(series, errors='ignore', cache=cache) + tm.assert_series_equal(series, result) + def test_to_datetime_other_datetime64_units(self): # 5/25/2012 scalar = np.int64(1337904000000000).view("M8[us]") From 007dfb0e8e6fc3ad3de8dbc1c12d590faa82ab78 Mon Sep 17 00:00:00 2001 From: Michael Date: Fri, 12 Mar 2021 23:38:10 -0800 Subject: [PATCH 2/3] Fix PEP8 issues --- pandas/tests/tools/test_to_datetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 4004d4a34ede2..91690cfea6c71 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1653,7 +1653,7 @@ def test_to_datetime_unprocessable_input(self, cache): @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_unhashable_input(self, cache): - series = Series([['a']]*100) + series = Series([['a']] * 100) result = to_datetime(series, errors='ignore', cache=cache) tm.assert_series_equal(series, result) From ebbaa5d1614ab2c265ebf3096ba73189cd209b77 Mon Sep 17 00:00:00 2001 From: Michael Date: Sat, 13 Mar 2021 16:53:01 -0800 Subject: [PATCH 3/3] Reformat --- pandas/tests/tools/test_to_datetime.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 91690cfea6c71..91f6c100419b6 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1653,8 +1653,8 @@ def test_to_datetime_unprocessable_input(self, cache): @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_unhashable_input(self, cache): - series = Series([['a']] * 100) - result = to_datetime(series, errors='ignore', cache=cache) + series = Series([["a"]] * 100) + result = to_datetime(series, errors="ignore", cache=cache) tm.assert_series_equal(series, result) def test_to_datetime_other_datetime64_units(self):