Skip to content

Commit 0277a9c

Browse files
authored
REF: merge datetime_to_datetime64 into array_to_datetime (#47018)
1 parent 61efac7 commit 0277a9c

File tree

6 files changed

+58
-136
lines changed

6 files changed

+58
-136
lines changed

pandas/_libs/tslib.pyx

+23-5
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ from pandas._libs.tslibs.nattype cimport (
5959
c_nat_strings as nat_strings,
6060
)
6161
from pandas._libs.tslibs.timestamps cimport _Timestamp
62+
from pandas._libs.tslibs.timezones cimport tz_compare
6263

6364
from pandas._libs.tslibs import (
6465
Resolution,
@@ -447,6 +448,7 @@ cpdef array_to_datetime(
447448
bint string_to_dts_failed
448449
datetime py_dt
449450
tzinfo tz_out = None
451+
bint found_tz = False, found_naive = False
450452

451453
# specify error conditions
452454
assert is_raise or is_ignore or is_coerce
@@ -465,18 +467,34 @@ cpdef array_to_datetime(
465467
elif PyDateTime_Check(val):
466468
seen_datetime = True
467469
if val.tzinfo is not None:
470+
found_tz = True
468471
if utc_convert:
469472
_ts = convert_datetime_to_tsobject(val, None)
470473
iresult[i] = _ts.value
471-
else:
474+
elif found_naive:
472475
raise ValueError('Tz-aware datetime.datetime '
473476
'cannot be converted to '
474477
'datetime64 unless utc=True')
475-
elif isinstance(val, _Timestamp):
476-
iresult[i] = val.value
478+
elif tz_out is not None and not tz_compare(tz_out, val.tzinfo):
479+
raise ValueError('Tz-aware datetime.datetime '
480+
'cannot be converted to '
481+
'datetime64 unless utc=True')
482+
else:
483+
found_tz = True
484+
tz_out = val.tzinfo
485+
_ts = convert_datetime_to_tsobject(val, None)
486+
iresult[i] = _ts.value
487+
477488
else:
478-
iresult[i] = pydatetime_to_dt64(val, &dts)
479-
check_dts_bounds(&dts)
489+
found_naive = True
490+
if found_tz:
491+
raise ValueError('Cannot mix tz-aware with '
492+
'tz-naive values')
493+
if isinstance(val, _Timestamp):
494+
iresult[i] = val.value
495+
else:
496+
iresult[i] = pydatetime_to_dt64(val, &dts)
497+
check_dts_bounds(&dts)
480498

481499
elif PyDate_Check(val):
482500
seen_datetime = True

pandas/_libs/tslibs/conversion.pyi

-3
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,4 @@ def ensure_timedelta64ns(
2323
arr: np.ndarray, # np.ndarray[timedelta64[ANY]]
2424
copy: bool = ...,
2525
) -> np.ndarray: ... # np.ndarray[timedelta64ns]
26-
def datetime_to_datetime64(
27-
values: npt.NDArray[np.object_],
28-
) -> tuple[np.ndarray, tzinfo | None]: ... # (np.ndarray[dt64ns], _)
2926
def localize_pydatetime(dt: datetime, tz: tzinfo | None) -> datetime: ...

pandas/_libs/tslibs/conversion.pyx

-74
Original file line numberDiff line numberDiff line change
@@ -264,80 +264,6 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool = True):
264264
return dt64_result.view(TD64NS_DTYPE)
265265

266266

267-
# ----------------------------------------------------------------------
268-
269-
270-
@cython.boundscheck(False)
271-
@cython.wraparound(False)
272-
def datetime_to_datetime64(ndarray values):
273-
# ndarray[object], but can't declare object without ndim
274-
"""
275-
Convert ndarray of datetime-like objects to int64 array representing
276-
nanosecond timestamps.
277-
278-
Parameters
279-
----------
280-
values : ndarray[object]
281-
282-
Returns
283-
-------
284-
result : ndarray[datetime64ns]
285-
inferred_tz : tzinfo or None
286-
"""
287-
cdef:
288-
Py_ssize_t i, n = values.size
289-
object val
290-
int64_t ival
291-
ndarray iresult # int64_t, but can't declare that without specifying ndim
292-
npy_datetimestruct dts
293-
_TSObject _ts
294-
bint found_naive = False
295-
tzinfo inferred_tz = None
296-
297-
cnp.broadcast mi
298-
299-
result = np.empty((<object>values).shape, dtype='M8[ns]')
300-
iresult = result.view('i8')
301-
302-
mi = cnp.PyArray_MultiIterNew2(iresult, values)
303-
for i in range(n):
304-
# Analogous to: val = values[i]
305-
val = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
306-
307-
if checknull_with_nat(val):
308-
ival = NPY_NAT
309-
elif PyDateTime_Check(val):
310-
if val.tzinfo is not None:
311-
if found_naive:
312-
raise ValueError('Cannot mix tz-aware with '
313-
'tz-naive values')
314-
if inferred_tz is not None:
315-
if not tz_compare(val.tzinfo, inferred_tz):
316-
raise ValueError('Array must be all same time zone')
317-
else:
318-
inferred_tz = val.tzinfo
319-
320-
_ts = convert_datetime_to_tsobject(val, None)
321-
ival = _ts.value
322-
check_dts_bounds(&_ts.dts)
323-
else:
324-
found_naive = True
325-
if inferred_tz is not None:
326-
raise ValueError('Cannot mix tz-aware with '
327-
'tz-naive values')
328-
ival = pydatetime_to_dt64(val, &dts)
329-
check_dts_bounds(&dts)
330-
else:
331-
raise TypeError(f'Unrecognized value type: {type(val)}')
332-
333-
# Analogous to: iresult[i] = ival
334-
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival
335-
336-
cnp.PyArray_MultiIter_NEXT(mi)
337-
338-
return result, inferred_tz
339-
340-
341267
# ----------------------------------------------------------------------
342268
# _TSObject Conversion
343269

pandas/core/arrays/datetimes.py

-8
Original file line numberDiff line numberDiff line change
@@ -2263,14 +2263,6 @@ def objects_to_datetime64ns(
22632263
allow_mixed=allow_mixed,
22642264
)
22652265
result = result.reshape(data.shape, order=order)
2266-
except ValueError as err:
2267-
try:
2268-
values, tz_parsed = conversion.datetime_to_datetime64(data)
2269-
# If tzaware, these values represent unix timestamps, so we
2270-
# return them as i8 to distinguish from wall times
2271-
return values.view("i8"), tz_parsed
2272-
except (ValueError, TypeError):
2273-
raise err
22742266
except OverflowError as err:
22752267
# Exception is raised when a part of date is greater than 32 bit signed int
22762268
raise OutOfBoundsDatetime("Out of bounds nanosecond timestamp") from err

pandas/core/tools/datetimes.py

+26-46
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
OutOfBoundsDatetime,
2525
Timedelta,
2626
Timestamp,
27-
conversion,
2827
iNaT,
2928
nat_strings,
3029
parsing,
@@ -41,6 +40,7 @@
4140
ArrayLike,
4241
DateTimeErrorChoices,
4342
Timezone,
43+
npt,
4444
)
4545
from pandas.util._exceptions import find_stack_level
4646

@@ -467,8 +467,6 @@ def _array_strptime_with_fallback(
467467

468468
try:
469469
result, timezones = array_strptime(arg, fmt, exact=exact, errors=errors)
470-
if "%Z" in fmt or "%z" in fmt:
471-
return _return_parsed_timezone_results(result, timezones, tz, name)
472470
except OutOfBoundsDatetime:
473471
if errors == "raise":
474472
raise
@@ -494,6 +492,9 @@ def _array_strptime_with_fallback(
494492
else:
495493
# Indicates to the caller to fallback to objects_to_datetime64ns
496494
return None
495+
else:
496+
if "%Z" in fmt or "%z" in fmt:
497+
return _return_parsed_timezone_results(result, timezones, tz, name)
497498

498499
return _box_as_indexlike(result, utc=utc, name=name)
499500

@@ -512,38 +513,28 @@ def _to_datetime_with_format(
512513
Try parsing with the given format, returning None on failure.
513514
"""
514515
result = None
515-
try:
516-
# shortcut formatting here
517-
if fmt == "%Y%m%d":
518-
# pass orig_arg as float-dtype may have been converted to
519-
# datetime64[ns]
520-
orig_arg = ensure_object(orig_arg)
521-
try:
522-
# may return None without raising
523-
result = _attempt_YYYYMMDD(orig_arg, errors=errors)
524-
except (ValueError, TypeError, OutOfBoundsDatetime) as err:
525-
raise ValueError(
526-
"cannot convert the input to '%Y%m%d' date format"
527-
) from err
528-
if result is not None:
529-
utc = tz == "utc"
530-
return _box_as_indexlike(result, utc=utc, name=name)
531516

532-
# fallback
533-
res = _array_strptime_with_fallback(
534-
arg, name, tz, fmt, exact, errors, infer_datetime_format
535-
)
536-
return res
537-
538-
except ValueError as err:
539-
# Fallback to try to convert datetime objects if timezone-aware
540-
# datetime objects are found without passing `utc=True`
517+
# shortcut formatting here
518+
if fmt == "%Y%m%d":
519+
# pass orig_arg as float-dtype may have been converted to
520+
# datetime64[ns]
521+
orig_arg = ensure_object(orig_arg)
541522
try:
542-
values, tz = conversion.datetime_to_datetime64(arg)
543-
dta = DatetimeArray(values, dtype=tz_to_dtype(tz))
544-
return DatetimeIndex._simple_new(dta, name=name)
545-
except (ValueError, TypeError):
546-
raise err
523+
# may return None without raising
524+
result = _attempt_YYYYMMDD(orig_arg, errors=errors)
525+
except (ValueError, TypeError, OutOfBoundsDatetime) as err:
526+
raise ValueError(
527+
"cannot convert the input to '%Y%m%d' date format"
528+
) from err
529+
if result is not None:
530+
utc = tz == "utc"
531+
return _box_as_indexlike(result, utc=utc, name=name)
532+
533+
# fallback
534+
res = _array_strptime_with_fallback(
535+
arg, name, tz, fmt, exact, errors, infer_datetime_format
536+
)
537+
return res
547538

548539

549540
def _to_datetime_with_unit(arg, unit, name, tz, errors: str) -> Index:
@@ -1007,17 +998,6 @@ def to_datetime(
1007998
DatetimeIndex(['2020-01-01 01:00:00-01:00', '2020-01-01 02:00:00-01:00'],
1008999
dtype='datetime64[ns, pytz.FixedOffset(-60)]', freq=None)
10091000
1010-
- Finally, mixing timezone-aware strings and :class:`datetime.datetime` always
1011-
raises an error, even if the elements all have the same time offset.
1012-
1013-
>>> from datetime import datetime, timezone, timedelta
1014-
>>> d = datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1)))
1015-
>>> pd.to_datetime(["2020-01-01 17:00 -0100", d])
1016-
Traceback (most recent call last):
1017-
...
1018-
ValueError: Tz-aware datetime.datetime cannot be converted to datetime64
1019-
unless utc=True
1020-
10211001
|
10221002
10231003
Setting ``utc=True`` solves most of the above issues:
@@ -1243,7 +1223,7 @@ def coerce(values):
12431223
return values
12441224

12451225

1246-
def _attempt_YYYYMMDD(arg: np.ndarray, errors: str) -> np.ndarray | None:
1226+
def _attempt_YYYYMMDD(arg: npt.NDArray[np.object_], errors: str) -> np.ndarray | None:
12471227
"""
12481228
try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like,
12491229
arg is a passed in as an object dtype, but could really be ints/strings
@@ -1257,7 +1237,7 @@ def _attempt_YYYYMMDD(arg: np.ndarray, errors: str) -> np.ndarray | None:
12571237

12581238
def calc(carg):
12591239
# calculate the actual result
1260-
carg = carg.astype(object)
1240+
carg = carg.astype(object, copy=False)
12611241
parsed = parsing.try_parse_year_month_day(
12621242
carg / 10000, carg / 100 % 100, carg % 100
12631243
)

pandas/tests/tools/test_to_datetime.py

+9
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from datetime import (
66
datetime,
77
timedelta,
8+
timezone,
89
)
910
from decimal import Decimal
1011
import locale
@@ -455,6 +456,14 @@ def test_to_datetime_parse_timezone_keeps_name(self):
455456

456457

457458
class TestToDatetime:
459+
def test_to_datetime_mixed_datetime_and_string(self):
460+
# GH#47018 adapted old doctest with new behavior
461+
d1 = datetime(2020, 1, 1, 17, tzinfo=timezone(-timedelta(hours=1)))
462+
d2 = datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1)))
463+
res = to_datetime(["2020-01-01 17:00 -0100", d2])
464+
expected = to_datetime([d1, d2]).tz_convert(pytz.FixedOffset(-60))
465+
tm.assert_index_equal(res, expected)
466+
458467
def test_to_datetime_np_str(self):
459468
# GH#32264
460469
value = np.str_("2019-02-04 10:18:46.297000+0000")

0 commit comments

Comments
 (0)