Skip to content

Commit e01b0fc

Browse files
committed
fixed problems found by review
1 parent 606ae7a commit e01b0fc

File tree

1 file changed

+20
-4
lines changed

1 file changed

+20
-4
lines changed

pandas/core/tools/datetimes.py

+20-4
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from pandas._libs.tslibs.strptime import array_strptime
1313
from pandas.util._decorators import deprecate_kwarg
1414

15-
from pandas.core.algorithms import unique
1615
from pandas.core.dtypes.common import (
1716
ensure_object, is_datetime64_dtype, is_datetime64_ns_dtype,
1817
is_datetime64tz_dtype, is_float, is_integer, is_integer_dtype,
@@ -23,6 +22,14 @@
2322

2423
from pandas._typing import ArrayLike
2524
from pandas.core import algorithms
25+
from pandas.core.algorithms import unique
26+
27+
# ---------------------------------------------------------------------
28+
# types used in annotations
29+
30+
ArrayConvertible = Union[list, tuple, ArrayLike, ABCSeries]
31+
32+
# ---------------------------------------------------------------------
2633

2734
# ---------------------------------------------------------------------
2835
# types used in annotations
@@ -43,7 +50,8 @@ def _guess_datetime_format_for_array(arr, **kwargs):
4350
return _guess_datetime_format(arr[non_nan_elements[0]], **kwargs)
4451

4552

46-
def should_cache(arg, unique_share=0.7, check_count=None):
53+
def should_cache(arg: ArrayConvertible, unique_share: float = 0.7,
54+
check_count: Optional[int] = None) -> bool:
4755
"""
4856
Decides whether to do caching.
4957
@@ -53,14 +61,22 @@ def should_cache(arg, unique_share=0.7, check_count=None):
5361
Parameters
5462
----------
5563
arg: listlike, tuple, 1-d array, Series
56-
unique_share: float or None
64+
unique_share: float, default=0.7, optional
5765
0 < unique_share < 1
58-
check_count: int or None
66+
check_count: int, optional
5967
0 <= check_count <= len(arg)
6068
6169
Returns
6270
-------
6371
do_caching: bool
72+
73+
Notes
74+
-----
75+
By default for a sequence of less than 50 items in size, we don't do
76+
caching; for the number of elements less than 5000, we take ten percent of
77+
all elements to check for a uniqueness share; if the sequence size is more
78+
than 5000, then we check only the first 500 elements.
79+
All constants were chosen empirically by.
6480
"""
6581
do_caching = True
6682

0 commit comments

Comments
 (0)