Skip to content

ENH: Format datetime.datetime and pd.Timestamp objects in pd.to_datetime #49338

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 15 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Other enhancements
- Fix ``test`` optional_extra by adding missing test package ``pytest-asyncio`` (:issue:`48361`)
- :func:`DataFrame.astype` exception message thrown improved to include column name when type conversion is not possible. (:issue:`47571`)
- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`)
- :func:`to_datetime` now skips ``datetime.datetime`` and :class:`Timestamp` objects when passing ``format`` argument instead of raising a ``ValueError``. (:issue:`49298`)

.. ---------------------------------------------------------------------------
.. _whatsnew_200.notable_bug_fixes:
Expand Down
28 changes: 23 additions & 5 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
"""Strptime-related classes and functions.
"""
from cpython.datetime cimport (
PyDateTime_Check,
date,
import_datetime,
tzinfo,
)

import_datetime()

from _thread import allocate_lock as _thread_allocate_lock

import numpy as np
Expand All @@ -25,7 +29,9 @@ from pandas._libs.tslibs.np_datetime cimport (
check_dts_bounds,
npy_datetimestruct,
npy_datetimestruct_to_datetime,
pydatetime_to_dt64,
)
from pandas._libs.tslibs.timestamps cimport _Timestamp


cdef dict _parse_code_table = {'y': 0,
Expand Down Expand Up @@ -122,19 +128,31 @@ def array_strptime(ndarray[object] values, str fmt, bint exact=True, errors='rai
result_timezone = np.empty(n, dtype='object')

dts.us = dts.ps = dts.as = 0
expect_tz_aware = "%z" in fmt or "%Z" in fmt

for i in range(n):
val = values[i]
if isinstance(val, str):
if val in nat_strings:
iresult[i] = NPY_NAT
continue
else:
if checknull_with_nat_and_na(val):
iresult[i] = NPY_NAT
continue
elif checknull_with_nat_and_na(val):
iresult[i] = NPY_NAT
continue
elif PyDateTime_Check(val):
if isinstance(val, _Timestamp):
iresult[i] = val.tz_localize(None)._as_unit("ns").value
else:
val = str(val)
iresult[i] = pydatetime_to_dt64(val, &dts)
check_dts_bounds(&dts)
if val.tzinfo is None and expect_tz_aware:
raise ValueError("Cannot mix tz-aware with tz-naive values")
elif val.tzinfo is not None and not expect_tz_aware:
Comment on lines +148 to +150
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does it work to simplify this into

Suggested change
if val.tzinfo is None and expect_tz_aware:
raise ValueError("Cannot mix tz-aware with tz-naive values")
elif val.tzinfo is not None and not expect_tz_aware:
if val.tzinfo is not None ^ expect_tz_aware:

?

raise ValueError("Cannot mix tz-aware with tz-naive values")
result_timezone[i] = val.tzinfo
continue
else:
val = str(val)

# exact matching
if exact:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ def _array_strptime_with_fallback(
# Indicates to the caller to fallback to objects_to_datetime64ns
return None
else:
if "%Z" in fmt or "%z" in fmt:
if any(timezones):
return _return_parsed_timezone_results(result, timezones, tz, name)

return _box_as_indexlike(result, utc=utc, name=name)
Expand Down
92 changes: 92 additions & 0 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,98 @@ def test_to_datetime_dtarr(self, tz):
result = to_datetime(arr)
assert result is arr

def test_to_datetime_arraylike_contains_pydatetime_and_timestamp(self):
# GH 49298
# Test explicit custom format
case1 = [
Timestamp("2001-10-01 12:00:01.123456789"),
datetime(2001, 10, 2, 12, 30, 1, 123456),
"10/03/01",
]
result = to_datetime(case1, format="%m/%d/%y")
expected_data = [
Timestamp("2001-10-01 12:00:01.123456789"),
Timestamp("2001-10-02 12:30:01.123456"),
Timestamp("2001-10-03 00:00:00"),
]
tm.assert_equal(result, DatetimeIndex(expected_data))

# Test ISO8601 format
case2 = [
Timestamp("2001-10-01 13:18:05"),
datetime(2001, 10, 2, 13, 18, 5),
"2001-10-03T13:18:05",
"20011004",
]
result = to_datetime(case2)
expected_data = [
Timestamp("2001-10-01 13:18:05"),
Timestamp("2001-10-02 13:18:05"),
Timestamp("2001-10-03 13:18:05"),
Timestamp("2001-10-04 00:00:00"),
]
tm.assert_equal(result, DatetimeIndex(expected_data))

def test_to_datetime_arraylike_contains_pydatetime_and_timestamp_with_tz(self):
# GH 49298
# Different offsets when utc=True
data = [
"20100102 121314 +01:00",
"20100102 121315 -05:00",
pytz.timezone("Europe/Berlin").localize(datetime(2010, 1, 2, 12, 13, 16)),
pytz.timezone("US/Eastern").localize(Timestamp("2010-01-02 12:13:17")),
]
expected_data = [
Timestamp("2010-01-02 11:13:14", tz="utc"),
Timestamp("2010-01-02 17:13:15", tz="utc"),
Timestamp("2010-01-02 11:13:16", tz="utc"),
Timestamp("2010-01-02 17:13:17", tz="utc"),
]
result = to_datetime(data, format="%Y%m%d %H%M%S %z", utc=True)
tm.assert_equal(result, DatetimeIndex(expected_data))

# Different offsets when utc=False
expected_data = [
Timestamp("2010-01-02 12:13:14 +01:00"),
Timestamp("2010-01-02 12:13:15 -05:00"),
Timestamp("2010-01-02 12:13:16 +01:00"),
Timestamp("2010-01-02 12:13:17 -05:00"),
]
result = to_datetime(data, format="%Y%m%d %H%M%S %z", utc=False)
tm.assert_equal(result, Index(expected_data))

@pytest.mark.parametrize("value", [datetime(2010, 1, 2, 12, 13, 16), Timestamp("2010-01-02 12:13:17")])
def test_to_datetime_includes_tz_dtype_on_pydatetime_and_timestamp(self, value):
# GH 49298
# No timezone
result_no_format = to_datetime([value])
result_with_format = to_datetime([value], format="%m-%d-%Y")
tm.assert_equal(result_no_format, result_with_format)

# Localized value
america_santiago = pytz.timezone("America/Santiago")
result_no_format = to_datetime([america_santiago.localize(value)])
result_with_format = to_datetime([america_santiago.localize(value)], format="%m-%d-%Y %z")
tm.assert_equal(result_with_format.dtype.tz, america_santiago)
tm.assert_equal(result_no_format, result_with_format)

@pytest.mark.parametrize("value", [datetime(2010, 1, 2, 12, 13, 16), Timestamp("2010-01-02 12:13:17")])
def test_to_datetime_mixing_naive_tzaware_raises(self, value):
# GH 49298
msg = "Cannot mix tz-aware with tz-naive values"
america_santiago = pytz.timezone("America/Santiago")
# Fail if format expects tz but input is not localized
with pytest.raises(ValueError, match=msg):
to_datetime([value], format="%m-%d-%Y %z")
# Fail if format does not expect tz but input is localized
with pytest.raises(ValueError, match=msg):
to_datetime([america_santiago.localize(value)], format="%m-%d-%Y")
# Mixed input should fail in both cases
with pytest.raises(ValueError, match=msg):
to_datetime([value, america_santiago.localize(value)], format="%m-%d-%Y %z")
with pytest.raises(ValueError, match=msg):
to_datetime([value, america_santiago.localize(value)], format="%m-%d-%Y")

def test_to_datetime_pydatetime(self):
actual = to_datetime(datetime(2008, 1, 15))
assert actual == datetime(2008, 1, 15)
Expand Down