Skip to content

REF: merge datetime_to_datetime64 into array_to_datetime #47018

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 23 additions & 5 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ from pandas._libs.tslibs.nattype cimport (
c_nat_strings as nat_strings,
)
from pandas._libs.tslibs.timestamps cimport _Timestamp
from pandas._libs.tslibs.timezones cimport tz_compare

from pandas._libs.tslibs import (
Resolution,
Expand Down Expand Up @@ -447,6 +448,7 @@ cpdef array_to_datetime(
bint string_to_dts_failed
datetime py_dt
tzinfo tz_out = None
bint found_tz = False, found_naive = False

# specify error conditions
assert is_raise or is_ignore or is_coerce
Expand All @@ -465,18 +467,34 @@ cpdef array_to_datetime(
elif PyDateTime_Check(val):
seen_datetime = True
if val.tzinfo is not None:
found_tz = True
if utc_convert:
_ts = convert_datetime_to_tsobject(val, None)
iresult[i] = _ts.value
else:
elif found_naive:
raise ValueError('Tz-aware datetime.datetime '
'cannot be converted to '
'datetime64 unless utc=True')
elif isinstance(val, _Timestamp):
iresult[i] = val.value
elif tz_out is not None and not tz_compare(tz_out, val.tzinfo):
raise ValueError('Tz-aware datetime.datetime '
'cannot be converted to '
'datetime64 unless utc=True')
else:
found_tz = True
tz_out = val.tzinfo
_ts = convert_datetime_to_tsobject(val, None)
iresult[i] = _ts.value

else:
iresult[i] = pydatetime_to_dt64(val, &dts)
check_dts_bounds(&dts)
found_naive = True
if found_tz:
raise ValueError('Cannot mix tz-aware with '
'tz-naive values')
if isinstance(val, _Timestamp):
iresult[i] = val.value
else:
iresult[i] = pydatetime_to_dt64(val, &dts)
check_dts_bounds(&dts)

elif PyDate_Check(val):
seen_datetime = True
Expand Down
3 changes: 0 additions & 3 deletions pandas/_libs/tslibs/conversion.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,4 @@ def ensure_timedelta64ns(
arr: np.ndarray, # np.ndarray[timedelta64[ANY]]
copy: bool = ...,
) -> np.ndarray: ... # np.ndarray[timedelta64ns]
def datetime_to_datetime64(
values: npt.NDArray[np.object_],
) -> tuple[np.ndarray, tzinfo | None]: ... # (np.ndarray[dt64ns], _)
def localize_pydatetime(dt: datetime, tz: tzinfo | None) -> datetime: ...
74 changes: 0 additions & 74 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -264,80 +264,6 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool = True):
return dt64_result.view(TD64NS_DTYPE)


# ----------------------------------------------------------------------


@cython.boundscheck(False)
@cython.wraparound(False)
def datetime_to_datetime64(ndarray values):
# ndarray[object], but can't declare object without ndim
"""
Convert ndarray of datetime-like objects to int64 array representing
nanosecond timestamps.
Parameters
----------
values : ndarray[object]
Returns
-------
result : ndarray[datetime64ns]
inferred_tz : tzinfo or None
"""
cdef:
Py_ssize_t i, n = values.size
object val
int64_t ival
ndarray iresult # int64_t, but can't declare that without specifying ndim
npy_datetimestruct dts
_TSObject _ts
bint found_naive = False
tzinfo inferred_tz = None

cnp.broadcast mi

result = np.empty((<object>values).shape, dtype='M8[ns]')
iresult = result.view('i8')

mi = cnp.PyArray_MultiIterNew2(iresult, values)
for i in range(n):
# Analogous to: val = values[i]
val = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]

if checknull_with_nat(val):
ival = NPY_NAT
elif PyDateTime_Check(val):
if val.tzinfo is not None:
if found_naive:
raise ValueError('Cannot mix tz-aware with '
'tz-naive values')
if inferred_tz is not None:
if not tz_compare(val.tzinfo, inferred_tz):
raise ValueError('Array must be all same time zone')
else:
inferred_tz = val.tzinfo

_ts = convert_datetime_to_tsobject(val, None)
ival = _ts.value
check_dts_bounds(&_ts.dts)
else:
found_naive = True
if inferred_tz is not None:
raise ValueError('Cannot mix tz-aware with '
'tz-naive values')
ival = pydatetime_to_dt64(val, &dts)
check_dts_bounds(&dts)
else:
raise TypeError(f'Unrecognized value type: {type(val)}')

# Analogous to: iresult[i] = ival
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival

cnp.PyArray_MultiIter_NEXT(mi)

return result, inferred_tz


# ----------------------------------------------------------------------
# _TSObject Conversion

Expand Down
8 changes: 0 additions & 8 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2263,14 +2263,6 @@ def objects_to_datetime64ns(
allow_mixed=allow_mixed,
)
result = result.reshape(data.shape, order=order)
except ValueError as err:
try:
values, tz_parsed = conversion.datetime_to_datetime64(data)
# If tzaware, these values represent unix timestamps, so we
# return them as i8 to distinguish from wall times
return values.view("i8"), tz_parsed
except (ValueError, TypeError):
raise err
except OverflowError as err:
# Exception is raised when a part of date is greater than 32 bit signed int
raise OutOfBoundsDatetime("Out of bounds nanosecond timestamp") from err
Expand Down
72 changes: 26 additions & 46 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
OutOfBoundsDatetime,
Timedelta,
Timestamp,
conversion,
iNaT,
nat_strings,
parsing,
Expand All @@ -41,6 +40,7 @@
ArrayLike,
DateTimeErrorChoices,
Timezone,
npt,
)
from pandas.util._exceptions import find_stack_level

Expand Down Expand Up @@ -467,8 +467,6 @@ def _array_strptime_with_fallback(

try:
result, timezones = array_strptime(arg, fmt, exact=exact, errors=errors)
if "%Z" in fmt or "%z" in fmt:
return _return_parsed_timezone_results(result, timezones, tz, name)
except OutOfBoundsDatetime:
if errors == "raise":
raise
Expand All @@ -494,6 +492,9 @@ def _array_strptime_with_fallback(
else:
# Indicates to the caller to fallback to objects_to_datetime64ns
return None
else:
if "%Z" in fmt or "%z" in fmt:
return _return_parsed_timezone_results(result, timezones, tz, name)

return _box_as_indexlike(result, utc=utc, name=name)

Expand All @@ -512,38 +513,28 @@ def _to_datetime_with_format(
Try parsing with the given format, returning None on failure.
"""
result = None
try:
# shortcut formatting here
if fmt == "%Y%m%d":
# pass orig_arg as float-dtype may have been converted to
# datetime64[ns]
orig_arg = ensure_object(orig_arg)
try:
# may return None without raising
result = _attempt_YYYYMMDD(orig_arg, errors=errors)
except (ValueError, TypeError, OutOfBoundsDatetime) as err:
raise ValueError(
"cannot convert the input to '%Y%m%d' date format"
) from err
if result is not None:
utc = tz == "utc"
return _box_as_indexlike(result, utc=utc, name=name)

# fallback
res = _array_strptime_with_fallback(
arg, name, tz, fmt, exact, errors, infer_datetime_format
)
return res

except ValueError as err:
# Fallback to try to convert datetime objects if timezone-aware
# datetime objects are found without passing `utc=True`
# shortcut formatting here
if fmt == "%Y%m%d":
# pass orig_arg as float-dtype may have been converted to
# datetime64[ns]
orig_arg = ensure_object(orig_arg)
try:
values, tz = conversion.datetime_to_datetime64(arg)
dta = DatetimeArray(values, dtype=tz_to_dtype(tz))
return DatetimeIndex._simple_new(dta, name=name)
except (ValueError, TypeError):
raise err
# may return None without raising
result = _attempt_YYYYMMDD(orig_arg, errors=errors)
except (ValueError, TypeError, OutOfBoundsDatetime) as err:
raise ValueError(
"cannot convert the input to '%Y%m%d' date format"
) from err
if result is not None:
utc = tz == "utc"
return _box_as_indexlike(result, utc=utc, name=name)

# fallback
res = _array_strptime_with_fallback(
arg, name, tz, fmt, exact, errors, infer_datetime_format
)
return res


def _to_datetime_with_unit(arg, unit, name, tz, errors: str) -> Index:
Expand Down Expand Up @@ -1007,17 +998,6 @@ def to_datetime(
DatetimeIndex(['2020-01-01 01:00:00-01:00', '2020-01-01 02:00:00-01:00'],
dtype='datetime64[ns, pytz.FixedOffset(-60)]', freq=None)
- Finally, mixing timezone-aware strings and :class:`datetime.datetime` always
raises an error, even if the elements all have the same time offset.
>>> from datetime import datetime, timezone, timedelta
>>> d = datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1)))
>>> pd.to_datetime(["2020-01-01 17:00 -0100", d])
Traceback (most recent call last):
...
ValueError: Tz-aware datetime.datetime cannot be converted to datetime64
unless utc=True
|
Setting ``utc=True`` solves most of the above issues:
Expand Down Expand Up @@ -1243,7 +1223,7 @@ def coerce(values):
return values


def _attempt_YYYYMMDD(arg: np.ndarray, errors: str) -> np.ndarray | None:
def _attempt_YYYYMMDD(arg: npt.NDArray[np.object_], errors: str) -> np.ndarray | None:
"""
try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like,
arg is a passed in as an object dtype, but could really be ints/strings
Expand All @@ -1257,7 +1237,7 @@ def _attempt_YYYYMMDD(arg: np.ndarray, errors: str) -> np.ndarray | None:

def calc(carg):
# calculate the actual result
carg = carg.astype(object)
carg = carg.astype(object, copy=False)
parsed = parsing.try_parse_year_month_day(
carg / 10000, carg / 100 % 100, carg % 100
)
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from datetime import (
datetime,
timedelta,
timezone,
)
from decimal import Decimal
import locale
Expand Down Expand Up @@ -455,6 +456,14 @@ def test_to_datetime_parse_timezone_keeps_name(self):


class TestToDatetime:
def test_to_datetime_mixed_datetime_and_string(self):
# GH#47018 adapted old doctest with new behavior
d1 = datetime(2020, 1, 1, 17, tzinfo=timezone(-timedelta(hours=1)))
d2 = datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1)))
res = to_datetime(["2020-01-01 17:00 -0100", d2])
expected = to_datetime([d1, d2]).tz_convert(pytz.FixedOffset(-60))
tm.assert_index_equal(res, expected)

def test_to_datetime_np_str(self):
# GH#32264
value = np.str_("2019-02-04 10:18:46.297000+0000")
Expand Down