File tree 4 files changed +21
-1
lines changed
4 files changed +21
-1
lines changed Original file line number Diff line number Diff line change @@ -858,6 +858,7 @@ Reshaping
858
858
- Bug in :meth: `DataFrame.stack ` not preserving ``CategoricalDtype `` in a ``MultiIndex `` (:issue: `36991 `)
859
859
- Bug in :func: `to_datetime ` raising error when input sequence contains unhashable items (:issue: `39756 `)
860
860
- Bug in :meth: `Series.explode ` preserving index when ``ignore_index `` was ``True `` and values were scalars (:issue: `40487 `)
861
+ - Bug in :func: `to_datetime ` raising ``ValueError `` when :class: `Series ` contains ``None `` and ``NaT `` and has more than 50 elements (:issue: `39882 `)
861
862
862
863
Sparse
863
864
^^^^^^
Original file line number Diff line number Diff line change 84
84
Scalar = Union [int , float , str ]
85
85
DatetimeScalar = TypeVar ("DatetimeScalar" , Scalar , datetime )
86
86
DatetimeScalarOrArrayConvertible = Union [DatetimeScalar , ArrayConvertible ]
87
+ start_caching_at = 50
87
88
88
89
89
90
# ---------------------------------------------------------------------
@@ -130,7 +131,7 @@ def should_cache(
130
131
# default realization
131
132
if check_count is None :
132
133
# in this case, the gain from caching is negligible
133
- if len (arg ) <= 50 :
134
+ if len (arg ) <= start_caching_at :
134
135
return False
135
136
136
137
if len (arg ) <= 5000 :
@@ -193,6 +194,9 @@ def _maybe_cache(
193
194
if len (unique_dates ) < len (arg ):
194
195
cache_dates = convert_listlike (unique_dates , format )
195
196
cache_array = Series (cache_dates , index = unique_dates )
197
+ if not cache_array .is_unique :
198
+ # GH#39882 in case of None and NaT we get duplicates
199
+ cache_array = cache_array .drop_duplicates ()
196
200
return cache_array
197
201
198
202
Original file line number Diff line number Diff line change 13
13
# for most cases), and the specific cases where the result deviates from
14
14
# this default. Those overrides are defined as a dict with (keyword, val) as
15
15
# dictionary key. In case of multiple items, the last override takes precedence.
16
+
16
17
test_cases = [
17
18
(
18
19
# data
Original file line number Diff line number Diff line change 43
43
import pandas ._testing as tm
44
44
from pandas .core .arrays import DatetimeArray
45
45
from pandas .core .tools import datetimes as tools
46
+ from pandas .core .tools .datetimes import start_caching_at
46
47
47
48
48
49
class TestTimeConversionFormats :
@@ -956,6 +957,19 @@ def test_to_datetime_cache_scalar(self):
956
957
expected = Timestamp ("20130101 00:00:00" )
957
958
assert result == expected
958
959
960
+ def test_convert_object_to_datetime_with_cache (self ):
961
+ # GH#39882
962
+ ser = Series (
963
+ [None ] + [NaT ] * start_caching_at + [Timestamp ("2012-07-26" )],
964
+ dtype = "object" ,
965
+ )
966
+ result = to_datetime (ser , errors = "coerce" )
967
+ expected = Series (
968
+ [NaT ] * (start_caching_at + 1 ) + [Timestamp ("2012-07-26" )],
969
+ dtype = "datetime64[ns]" ,
970
+ )
971
+ tm .assert_series_equal (result , expected )
972
+
959
973
@pytest .mark .parametrize (
960
974
"date, format" ,
961
975
[
You can’t perform that action at this time.
0 commit comments