Skip to content

Commit 50bca8e

Browse files
authored
BUG: Fix to_datetime() cache behaviour to not omit duplicated output values (#42261)
1 parent 196b28a commit 50bca8e

File tree

3 files changed

+32
-10
lines changed

3 files changed

+32
-10
lines changed

doc/source/whatsnew/v1.4.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ Categorical
122122

123123
Datetimelike
124124
^^^^^^^^^^^^
125-
-
125+
- Bug in :func:`to_datetime` returning pd.NaT for inputs that produce duplicated values, when ``cache=True`` (:issue:`42259`)
126126
-
127127

128128
Timedelta

pandas/core/tools/datetimes.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,9 @@ def _maybe_cache(
194194
if len(unique_dates) < len(arg):
195195
cache_dates = convert_listlike(unique_dates, format)
196196
cache_array = Series(cache_dates, index=unique_dates)
197-
if not cache_array.is_unique:
198-
# GH#39882 in case of None and NaT we get duplicates
199-
cache_array = cache_array.drop_duplicates()
197+
# GH#39882 and GH#35888 in case of None and NaT we get duplicates
198+
if not cache_array.index.is_unique:
199+
cache_array = cache_array[~cache_array.index.duplicated()]
200200
return cache_array
201201

202202

pandas/tests/tools/test_to_datetime.py

+28-6
Original file line numberDiff line numberDiff line change
@@ -957,18 +957,40 @@ def test_to_datetime_cache_scalar(self):
957957
expected = Timestamp("20130101 00:00:00")
958958
assert result == expected
959959

960-
def test_convert_object_to_datetime_with_cache(self):
960+
@pytest.mark.parametrize(
961+
"datetimelikes,expected_values",
962+
(
963+
(
964+
(None, np.nan) + (NaT,) * start_caching_at,
965+
(NaT,) * (start_caching_at + 2),
966+
),
967+
(
968+
(None, Timestamp("2012-07-26")) + (NaT,) * start_caching_at,
969+
(NaT, Timestamp("2012-07-26")) + (NaT,) * start_caching_at,
970+
),
971+
(
972+
(None,)
973+
+ (NaT,) * start_caching_at
974+
+ ("2012 July 26", Timestamp("2012-07-26")),
975+
(NaT,) * (start_caching_at + 1)
976+
+ (Timestamp("2012-07-26"), Timestamp("2012-07-26")),
977+
),
978+
),
979+
)
980+
def test_convert_object_to_datetime_with_cache(
981+
self, datetimelikes, expected_values
982+
):
961983
# GH#39882
962984
ser = Series(
963-
[None] + [NaT] * start_caching_at + [Timestamp("2012-07-26")],
985+
datetimelikes,
964986
dtype="object",
965987
)
966-
result = to_datetime(ser, errors="coerce")
967-
expected = Series(
968-
[NaT] * (start_caching_at + 1) + [Timestamp("2012-07-26")],
988+
result_series = to_datetime(ser, errors="coerce")
989+
expected_series = Series(
990+
expected_values,
969991
dtype="datetime64[ns]",
970992
)
971-
tm.assert_series_equal(result, expected)
993+
tm.assert_series_equal(result_series, expected_series)
972994

973995
@pytest.mark.parametrize(
974996
"date, format",

0 commit comments

Comments
 (0)