Skip to content

ENH: Format datetime.datetime and pd.Timestamp objects in pd.to_datetime #49338

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 15 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Other enhancements
- Fix ``test`` optional_extra by adding missing test package ``pytest-asyncio`` (:issue:`48361`)
- :func:`DataFrame.astype` exception message thrown improved to include column name when type conversion is not possible. (:issue:`47571`)
- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`)
- :func:`to_datetime` now correctly parses ``datetime.datetime`` objects in the input when using the ``format`` argument instead of raising a ``ValueError``. (:issue:`49298`)

.. ---------------------------------------------------------------------------
.. _whatsnew_200.notable_bug_fixes:
Expand Down
47 changes: 42 additions & 5 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""
from cpython.datetime cimport (
date,
datetime,
tzinfo,
)

Expand Down Expand Up @@ -129,12 +130,22 @@ def array_strptime(ndarray[object] values, str fmt, bint exact=True, errors='rai
if val in nat_strings:
iresult[i] = NPY_NAT
continue
elif checknull_with_nat_and_na(val):
iresult[i] = NPY_NAT
continue
elif isinstance(val, datetime):
iresult[i] = _parse_python_datetime_object(val, &dts)
try:
check_dts_bounds(&dts)
except ValueError:
if is_coerce:
iresult[i] = NPY_NAT
continue
raise
result_timezone[i] = val.tzname()
continue
else:
if checknull_with_nat_and_na(val):
iresult[i] = NPY_NAT
continue
else:
val = str(val)
val = str(val)

# exact matching
if exact:
Expand Down Expand Up @@ -532,3 +543,29 @@ cdef tzinfo parse_timezone_directive(str z):
(microseconds // 60_000_000))
total_minutes = -total_minutes if z.startswith("-") else total_minutes
return pytz.FixedOffset(total_minutes)

cdef int64_t _parse_python_datetime_object(datetime dt, npy_datetimestruct *dts):
"""
Parse a native datetime.datetime object and return a numpy datetime object

Parameters
----------
dt : datetime.datetime instance
dts: numpy datetime struct

Returns
-------
int64_t
the numpy datetime object
"""
dts.year = dt.year
dts.month = dt.month
dts.day = dt.day
dts.hour = dt.hour
dts.min = dt.minute
dts.sec = dt.second
dts.us = dt.microsecond
dts.ps = 0 # Not enough precision in datetime objects (https://github.com/python/cpython/issues/59648)

npy_datetime = npy_datetimestruct_to_datetime(NPY_FR_ns, dts)
return npy_datetime
50 changes: 50 additions & 0 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,56 @@ def test_to_datetime_dtarr(self, tz):
result = to_datetime(arr)
assert result is arr

@pytest.mark.parametrize(
"init_constructor, end_constructor",
[
(Index, DatetimeIndex),
(list, DatetimeIndex),
(np.array, DatetimeIndex),
(Series, Series),
],
)
def test_to_datetime_arraylike_contains_pydatetime(
self, init_constructor, end_constructor
):
# GH 49298
data = ["01/02/01 12:00", datetime(2001, 2, 2, 12, 30)]
expected_data = [
Timestamp("2001-02-01 12:00:00"),
Timestamp("2001-02-02 12:30:00"),
]
result = to_datetime(init_constructor(data), format="%d/%m/%y %H:%M")
expected = end_constructor(expected_data)
tm.assert_equal(result, expected)

@pytest.mark.parametrize(
"init_constructor, end_constructor",
[
(Index, DatetimeIndex),
(list, DatetimeIndex),
(np.array, DatetimeIndex),
(Series, Series),
],
)
def test_to_datetime_arraylike_contains_pydatetime_utc(
self, cache, init_constructor, end_constructor
):
# GH 49298
dt = datetime(2010, 1, 2, 12, 13, 16)
dt = dt.replace(tzinfo=timezone.utc)
data = ["20100102 121314", "20100102 121315", dt]
expected_data = [
Timestamp("2010-01-02 12:13:14", tz="utc"),
Timestamp("2010-01-02 12:13:15", tz="utc"),
Timestamp("2010-01-02 12:13:16", tz="utc"),
]

result = to_datetime(
init_constructor(data), format="%Y%m%d %H%M%S", utc=True, cache=cache
)
expected = end_constructor(expected_data)
tm.assert_equal(result, expected)

def test_to_datetime_pydatetime(self):
actual = to_datetime(datetime(2008, 1, 15))
assert actual == datetime(2008, 1, 15)
Expand Down