Skip to content

ENH: Format datetime.datetime and pd.Timestamp objects in pd.to_datetime #49338

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 15 commits into from
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Other enhancements
- Fix ``test`` optional_extra by adding missing test package ``pytest-asyncio`` (:issue:`48361`)
- :func:`DataFrame.astype` exception message thrown improved to include column name when type conversion is not possible. (:issue:`47571`)
- :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`)
- :func:`to_datetime` now handles ``datetime.datetime`` and :class:`Timestamp` and applies the ``format`` argument on them instead of raising a ``ValueError``. (:issue:`49298`)

.. ---------------------------------------------------------------------------
.. _whatsnew_200.notable_bug_fixes:
Expand Down
12 changes: 7 additions & 5 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""
from cpython.datetime cimport (
date,
datetime,
tzinfo,
)

Expand Down Expand Up @@ -129,12 +130,13 @@ def array_strptime(ndarray[object] values, str fmt, bint exact=True, errors='rai
if val in nat_strings:
iresult[i] = NPY_NAT
continue
elif checknull_with_nat_and_na(val):
iresult[i] = NPY_NAT
continue
elif isinstance(val, datetime):
val = val.strftime(fmt)
else:
if checknull_with_nat_and_na(val):
iresult[i] = NPY_NAT
continue
else:
val = str(val)
val = str(val)

# exact matching
if exact:
Expand Down
115 changes: 115 additions & 0 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,121 @@ def test_to_datetime_dtarr(self, tz):
result = to_datetime(arr)
assert result is arr

@pytest.mark.parametrize(
"data, expected",
[
(
Timestamp("2001-10-01 12:00:01.123456789"),
Timestamp("2001-10-01 12:00:01.123456"),
),
(
datetime(2002, 10, 1, 12, 00, 1, 123456),
Timestamp("2002-10-01 12:00:01.123456"),
),
("10/01/03 12:00:01.123456789", Timestamp("2003-10-01 12:00:01.123456789")),
],
)
def test_to_datetime_preserves_resolution_when_possible(self, data, expected):
# GH 49298
if not isinstance(data, str):
result = to_datetime([data])
tm.assert_equal(result, DatetimeIndex([data]))

result = to_datetime([data], format="%m/%d/%y %H:%M:%S.%f")
tm.assert_equal(result, DatetimeIndex([expected]))

@pytest.mark.parametrize(
"init_constructor, end_constructor",
[
(Index, DatetimeIndex),
(list, DatetimeIndex),
(np.array, DatetimeIndex),
(Series, Series),
],
)
def test_to_datetime_arraylike_contains_pydatetime_and_timestamp(
self, init_constructor, end_constructor
):
# GH 49298
# Timestamp/datetime have more resolution than str
case1 = [
Timestamp("2001-10-01 12:00:01.123456789"),
datetime(2001, 10, 2, 12, 30, 1, 123456),
"10/03/01",
]
result = to_datetime(init_constructor(case1), format="%m/%d/%y")
expected_data = [
Timestamp("2001-10-01"),
Timestamp("2001-10-02"),
Timestamp("2001-10-03"),
]
expected = end_constructor(expected_data)
tm.assert_equal(result, expected)

# Timestamp/datetime have the same resolution than str (nanosecond)
case2 = [
Timestamp("2001-10-01 12:00:01.123456789"),
datetime(2001, 10, 2, 12, 30, 1, 123456),
"10/03/01 13:00:01.123456789",
]
result = to_datetime(init_constructor(case2), format="%m/%d/%y %H:%M:%S.%f")
expected_data = [
Timestamp("2001-10-01 12:00:01.123456"),
Timestamp("2001-10-02 12:30:01.123456"),
Timestamp("2001-10-03 13:00:01.123456789"),
]
expected = end_constructor(expected_data)
tm.assert_equal(result, expected)

# Timestamp/datetime have less resolution than str
case3 = [
Timestamp("2001-10-01"),
datetime(2001, 10, 2),
"10/03/01 12:00:01",
]
result = to_datetime(init_constructor(case3), format="%m/%d/%y %H:%M:%S")
expected_data = [
Timestamp("2001-10-01 00:00:00"),
Timestamp("2001-10-02 00:00:00"),
Timestamp("2001-10-03 12:00:01"),
]
expected = end_constructor(expected_data)
tm.assert_equal(result, expected)

@pytest.mark.parametrize(
"init_constructor, end_constructor",
[
(Index, DatetimeIndex),
(list, DatetimeIndex),
(np.array, DatetimeIndex),
(Series, Series),
],
)
def test_to_datetime_arraylike_contains_pydatetime_and_timestamp_utc(
self, cache, init_constructor, end_constructor
):
# GH 49298
dt = datetime(2010, 1, 2, 12, 13, 16)
dt = dt.replace(tzinfo=timezone.utc)
data = [
"20100102 121314",
Timestamp("2010-01-02 12:13:15", tz="utc"),
dt,
]
expected_data = [
Timestamp("2010-01-02 12:13:14", tz="utc"),
Timestamp("2010-01-02 12:13:15", tz="utc"),
Timestamp("2010-01-02 12:13:16", tz="utc"),
]

if init_constructor is Series:
input_data = init_constructor(data, dtype="datetime64[ns, UTC]")
else:
input_data = init_constructor(data)
result = to_datetime(input_data, format="%Y%m%d %H%M%S", utc=True, cache=cache)
expected = end_constructor(expected_data)
tm.assert_equal(result, expected)

def test_to_datetime_pydatetime(self):
actual = to_datetime(datetime(2008, 1, 15))
assert actual == datetime(2008, 1, 15)
Expand Down