Skip to content

Commit b0a0c68

Browse files
authored
BUG: to_datetime with empty string (#55771)
1 parent f04da3c commit b0a0c68

File tree

2 files changed

+24
-15
lines changed

2 files changed

+24
-15
lines changed

pandas/_libs/tslib.pyx

+15-13
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,12 @@ cpdef array_to_datetime(
519519
iresult[i] = _ts.value
520520

521521
tz = _ts.tzinfo
522-
if tz is not None:
522+
if _ts.value == NPY_NAT:
523+
# e.g. "NaT" string or empty string, we do not consider
524+
# this as either tzaware or tznaive. See
525+
# test_to_datetime_with_empty_str_utc_false_format_mixed
526+
pass
527+
elif tz is not None:
523528
# dateutil timezone objects cannot be hashed, so
524529
# store the UTC offsets in seconds instead
525530
nsecs = tz.utcoffset(None).total_seconds()
@@ -610,7 +615,6 @@ cdef _array_to_datetime_object(
610615
# 1) NaT or NaT-like values
611616
# 2) datetime strings, which we return as datetime.datetime
612617
# 3) special strings - "now" & "today"
613-
unique_timezones = set()
614618
for i in range(n):
615619
# Analogous to: val = values[i]
616620
val = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
@@ -640,7 +644,6 @@ cdef _array_to_datetime_object(
640644
tzinfo=tsobj.tzinfo,
641645
fold=tsobj.fold,
642646
)
643-
unique_timezones.add(tsobj.tzinfo)
644647

645648
except (ValueError, OverflowError) as ex:
646649
ex.args = (f"{ex}, at position {i}", )
@@ -658,16 +661,15 @@ cdef _array_to_datetime_object(
658661

659662
cnp.PyArray_MultiIter_NEXT(mi)
660663

661-
if len(unique_timezones) > 1:
662-
warnings.warn(
663-
"In a future version of pandas, parsing datetimes with mixed time "
664-
"zones will raise an error unless `utc=True`. "
665-
"Please specify `utc=True` to opt in to the new behaviour "
666-
"and silence this warning. To create a `Series` with mixed offsets and "
667-
"`object` dtype, please use `apply` and `datetime.datetime.strptime`",
668-
FutureWarning,
669-
stacklevel=find_stack_level(),
670-
)
664+
warnings.warn(
665+
"In a future version of pandas, parsing datetimes with mixed time "
666+
"zones will raise an error unless `utc=True`. "
667+
"Please specify `utc=True` to opt in to the new behaviour "
668+
"and silence this warning. To create a `Series` with mixed offsets and "
669+
"`object` dtype, please use `apply` and `datetime.datetime.strptime`",
670+
FutureWarning,
671+
stacklevel=find_stack_level(),
672+
)
671673
return oresult_nd, None
672674

673675

pandas/tests/tools/test_to_datetime.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -3675,10 +3675,17 @@ def test_from_numeric_arrow_dtype(any_numeric_ea_dtype):
36753675

36763676
def test_to_datetime_with_empty_str_utc_false_format_mixed():
36773677
# GH 50887
3678-
result = to_datetime(["2020-01-01 00:00+00:00", ""], format="mixed")
3679-
expected = Index([Timestamp("2020-01-01 00:00+00:00"), "NaT"], dtype=object)
3678+
vals = ["2020-01-01 00:00+00:00", ""]
3679+
result = to_datetime(vals, format="mixed")
3680+
expected = Index([Timestamp("2020-01-01 00:00+00:00"), "NaT"], dtype="M8[ns, UTC]")
36803681
tm.assert_index_equal(result, expected)
36813682

3683+
# Check that a couple of other similar paths work the same way
3684+
alt = to_datetime(vals)
3685+
tm.assert_index_equal(alt, expected)
3686+
alt2 = DatetimeIndex(vals)
3687+
tm.assert_index_equal(alt2, expected)
3688+
36823689

36833690
def test_to_datetime_with_empty_str_utc_false_offsets_and_format_mixed():
36843691
# GH 50887

0 commit comments

Comments
 (0)