Skip to content

Commit 3a0ac42

Browse files
committed
BUG: fix datetimes.should_cache() error for deque (GH 29403)
itertools.islice() should be used to get slice of a deque. itertools.islice() also can be used (and is efficient) for other collections. So unique(arg[:check_count]) was replaced with set(islice(arg, check_count))
1 parent 35029d2 commit 3a0ac42

File tree

2 files changed

+14
-1
lines changed

2 files changed

+14
-1
lines changed

pandas/core/tools/datetimes.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from collections import abc
22
from datetime import datetime, time
33
from functools import partial
4+
from itertools import islice
45
from typing import Optional, TypeVar, Union
56

67
import numpy as np
@@ -118,7 +119,7 @@ def should_cache(
118119

119120
assert 0 < unique_share < 1, "unique_share must be in next bounds: (0; 1)"
120121

121-
unique_elements = unique(arg[:check_count])
122+
unique_elements = set(islice(arg, check_count))
122123
if len(unique_elements) > check_count * unique_share:
123124
do_caching = False
124125
return do_caching

pandas/tests/indexes/datetimes/test_tools.py

+12
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import calendar
44
from datetime import datetime, time
55
import locale
6+
from collections import deque
67

78
from dateutil.parser import parse
89
from dateutil.tz.tz import tzoffset
@@ -2275,6 +2276,17 @@ def test_should_cache(listlike, do_caching):
22752276
)
22762277

22772278

2279+
@pytest.mark.parametrize(
2280+
"listlike",
2281+
[(deque([pd.Timestamp("2010-06-02 09:30:00")] * 51)),
2282+
([pd.Timestamp("2010-06-02 09:30:00")] * 51),
2283+
(tuple([pd.Timestamp("2010-06-02 09:30:00")] * 51))],
2284+
)
2285+
def test_no_slicing_errors_in_should_cache(listlike):
2286+
# GH 29403
2287+
assert tools.should_cache(listlike) is True
2288+
2289+
22782290
@pytest.mark.parametrize(
22792291
"unique_share,check_count, err_message",
22802292
[

0 commit comments

Comments
 (0)