Skip to content

Commit ba43224

Browse files
authored
BUG: to_datetime with mixed-string-and-numeric (#55780)
* BUG: to_datetime with mixed-string-and-numeric * GH ref * update astype test
1 parent f3b9309 commit ba43224

File tree

8 files changed

+60
-19
lines changed

8 files changed

+60
-19
lines changed

doc/source/whatsnew/v2.2.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,9 @@ Categorical
321321

322322
Datetimelike
323323
^^^^^^^^^^^^
324+
- Bug in :class:`DatetimeIndex` when passing an object-dtype ndarray of float objects and a ``tz`` incorrectly localizing the result (:issue:`55780`)
324325
- Bug in :func:`concat` raising ``AttributeError`` when concatenating all-NA DataFrame with :class:`DatetimeTZDtype` dtype DataFrame. (:issue:`52093`)
326+
- Bug in :func:`to_datetime` and :class:`DatetimeIndex` when passing a list of mixed-string-and-numeric types incorrectly raising (:issue:`55780`)
325327
- Bug in :meth:`DatetimeIndex.union` returning object dtype for tz-aware indexes with the same timezone but different units (:issue:`55238`)
326328
- Bug in :meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` always caching :meth:`Index.is_unique` as ``True`` when first value in index is ``NaT`` (:issue:`55755`)
327329
- Bug in :meth:`Index.view` to a datetime64 dtype with non-supported resolution incorrectly raising (:issue:`55710`)

pandas/_libs/tslib.pyx

+6-6
Original file line numberDiff line numberDiff line change
@@ -700,15 +700,15 @@ def array_to_datetime_with_tz(ndarray values, tzinfo tz, NPY_DATETIMEUNIT creso)
700700
ival = NPY_NAT
701701

702702
else:
703-
ts = Timestamp(item)
703+
if PyDateTime_Check(item) and item.tzinfo is not None:
704+
# We can't call Timestamp constructor with a tz arg, have to
705+
# do 2-step
706+
ts = Timestamp(item).tz_convert(tz)
707+
else:
708+
ts = Timestamp(item, tz=tz)
704709
if ts is NaT:
705710
ival = NPY_NAT
706711
else:
707-
if ts.tzinfo is not None:
708-
ts = ts.tz_convert(tz)
709-
else:
710-
# datetime64, tznaive pydatetime, int, float
711-
ts = ts.tz_localize(tz)
712712
ts = (<_Timestamp>ts)._as_creso(creso)
713713
ival = ts._value
714714

pandas/core/arrays/datetimelike.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181
)
8282
from pandas.util._exceptions import find_stack_level
8383

84+
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
8485
from pandas.core.dtypes.common import (
8586
is_all_strings,
8687
is_integer_dtype,
@@ -2358,7 +2359,8 @@ def ensure_arraylike_for_datetimelike(data, copy: bool, cls_name: str):
23582359
if not isinstance(data, (list, tuple)) and np.ndim(data) == 0:
23592360
# i.e. generator
23602361
data = list(data)
2361-
data = np.asarray(data)
2362+
2363+
data = construct_1d_object_array_from_listlike(data)
23622364
copy = False
23632365
elif isinstance(data, ABCMultiIndex):
23642366
raise TypeError(f"Cannot create a {cls_name} from a MultiIndex.")

pandas/tests/dtypes/test_missing.py

+14-6
Original file line numberDiff line numberDiff line change
@@ -418,12 +418,10 @@ def test_array_equivalent(dtype_equal):
418418
assert not array_equivalent(
419419
Index([0, np.nan]), Index([1, np.nan]), dtype_equal=dtype_equal
420420
)
421-
assert array_equivalent(
422-
DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan]), dtype_equal=dtype_equal
423-
)
424-
assert not array_equivalent(
425-
DatetimeIndex([0, np.nan]), DatetimeIndex([1, np.nan]), dtype_equal=dtype_equal
426-
)
421+
422+
423+
@pytest.mark.parametrize("dtype_equal", [True, False])
424+
def test_array_equivalent_tdi(dtype_equal):
427425
assert array_equivalent(
428426
TimedeltaIndex([0, np.nan]),
429427
TimedeltaIndex([0, np.nan]),
@@ -435,6 +433,16 @@ def test_array_equivalent(dtype_equal):
435433
dtype_equal=dtype_equal,
436434
)
437435

436+
437+
@pytest.mark.parametrize("dtype_equal", [True, False])
438+
def test_array_equivalent_dti(dtype_equal):
439+
assert array_equivalent(
440+
DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan]), dtype_equal=dtype_equal
441+
)
442+
assert not array_equivalent(
443+
DatetimeIndex([0, np.nan]), DatetimeIndex([1, np.nan]), dtype_equal=dtype_equal
444+
)
445+
438446
dti1 = DatetimeIndex([0, np.nan], tz="US/Eastern")
439447
dti2 = DatetimeIndex([0, np.nan], tz="CET")
440448
dti3 = DatetimeIndex([1, np.nan], tz="US/Eastern")

pandas/tests/frame/test_constructors.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -3154,9 +3154,9 @@ def test_from_scalar_datetimelike_mismatched(self, constructor, cls):
31543154
dtype = {np.datetime64: "m8[ns]", np.timedelta64: "M8[ns]"}[cls]
31553155

31563156
if cls is np.datetime64:
3157-
msg1 = r"dtype datetime64\[ns\] cannot be converted to timedelta64\[ns\]"
3157+
msg1 = "Invalid type for timedelta scalar: <class 'numpy.datetime64'>"
31583158
else:
3159-
msg1 = r"dtype timedelta64\[ns\] cannot be converted to datetime64\[ns\]"
3159+
msg1 = "<class 'numpy.timedelta64'> is not convertible to datetime"
31603160
msg = "|".join(["Cannot cast", msg1])
31613161

31623162
with pytest.raises(TypeError, match=msg):

pandas/tests/indexes/datetimes/test_constructors.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -1054,8 +1054,11 @@ def test_dti_constructor_with_non_nano_dtype(self, tz):
10541054
# to 2 microseconds
10551055
vals = [ts, "2999-01-02 03:04:05.678910", 2500]
10561056
result = DatetimeIndex(vals, dtype=dtype)
1057-
exp_arr = np.array([ts.asm8, vals[1], 2], dtype="M8[us]")
1058-
expected = DatetimeIndex(exp_arr, dtype="M8[us]").tz_localize(tz)
1057+
exp_vals = [Timestamp(x, tz=tz).as_unit("us").asm8 for x in vals]
1058+
exp_arr = np.array(exp_vals, dtype="M8[us]")
1059+
expected = DatetimeIndex(exp_arr, dtype="M8[us]")
1060+
if tz is not None:
1061+
expected = expected.tz_localize("UTC").tz_convert(tz)
10591062
tm.assert_index_equal(result, expected)
10601063

10611064
result2 = DatetimeIndex(np.array(vals, dtype=object), dtype=dtype)
@@ -1080,6 +1083,15 @@ def test_dti_constructor_with_non_nano_now_today(self):
10801083
assert diff1 >= pd.Timedelta(0)
10811084
assert diff1 < tolerance
10821085

1086+
def test_dti_constructor_object_float_matches_float_dtype(self):
1087+
# GH#55780
1088+
arr = np.array([0, np.nan], dtype=np.float64)
1089+
arr2 = arr.astype(object)
1090+
1091+
dti1 = DatetimeIndex(arr, tz="CET")
1092+
dti2 = DatetimeIndex(arr2, tz="CET")
1093+
tm.assert_index_equal(dti1, dti2)
1094+
10831095

10841096
class TestTimeSeries:
10851097
def test_dti_constructor_preserve_dti_freq(self):

pandas/tests/series/methods/test_astype.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,11 @@ def test_astype_object_to_dt64_non_nano(self, tz):
120120
ser = Series(vals, dtype=object)
121121
result = ser.astype(dtype)
122122

123-
exp_arr = np.array([ts.asm8, vals[1], 2], dtype="M8[us]")
124-
expected = Series(exp_arr, dtype="M8[us]").dt.tz_localize(tz)
123+
exp_vals = [Timestamp(x, tz=tz).as_unit("us").asm8 for x in vals]
124+
exp_arr = np.array(exp_vals, dtype="M8[us]")
125+
expected = Series(exp_arr, dtype="M8[us]")
126+
if tz is not None:
127+
expected = expected.dt.tz_localize("UTC").dt.tz_convert(tz)
125128
tm.assert_series_equal(result, expected)
126129

127130
def test_astype_mixed_object_to_dt64tz(self):

pandas/tests/tools/test_to_datetime.py

+14
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,20 @@ def test_to_datetime_mixed_datetime_and_string(self):
603603
expected = to_datetime([d1, d2]).tz_convert(timezone(timedelta(minutes=-60)))
604604
tm.assert_index_equal(res, expected)
605605

606+
def test_to_datetime_mixed_string_and_numeric(self):
607+
# GH#55780 np.array(vals) would incorrectly cast the number to str
608+
vals = ["2016-01-01", 0]
609+
expected = DatetimeIndex([Timestamp(x) for x in vals])
610+
result = to_datetime(vals, format="mixed")
611+
result2 = to_datetime(vals[::-1], format="mixed")[::-1]
612+
result3 = DatetimeIndex(vals)
613+
result4 = DatetimeIndex(vals[::-1])[::-1]
614+
615+
tm.assert_index_equal(result, expected)
616+
tm.assert_index_equal(result2, expected)
617+
tm.assert_index_equal(result3, expected)
618+
tm.assert_index_equal(result4, expected)
619+
606620
@pytest.mark.parametrize(
607621
"format", ["%Y-%m-%d", "%Y-%d-%m"], ids=["ISO8601", "non-ISO8601"]
608622
)

0 commit comments

Comments
 (0)