diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 4c85b3d5dc745..337d1df20488a 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -49,6 +49,7 @@ Other enhancements - Fix ``test`` optional_extra by adding missing test package ``pytest-asyncio`` (:issue:`48361`) - :func:`DataFrame.astype` exception message thrown improved to include column name when type conversion is not possible. (:issue:`47571`) - :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`) +- :func:`to_datetime` now skips ``datetime.datetime`` and :class:`Timestamp` objects when passing ``format`` argument instead of raising a ``ValueError``. (:issue:`49298`) .. --------------------------------------------------------------------------- .. _whatsnew_200.notable_bug_fixes: diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 6287c2fbc5d34..7de1a5ab15b7c 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -1,10 +1,14 @@ """Strptime-related classes and functions. """ from cpython.datetime cimport ( + PyDateTime_Check, date, + import_datetime, tzinfo, ) +import_datetime() + from _thread import allocate_lock as _thread_allocate_lock import numpy as np @@ -25,7 +29,9 @@ from pandas._libs.tslibs.np_datetime cimport ( check_dts_bounds, npy_datetimestruct, npy_datetimestruct_to_datetime, + pydatetime_to_dt64, ) +from pandas._libs.tslibs.timestamps cimport _Timestamp cdef dict _parse_code_table = {'y': 0, @@ -122,6 +128,7 @@ def array_strptime(ndarray[object] values, str fmt, bint exact=True, errors='rai result_timezone = np.empty(n, dtype='object') dts.us = dts.ps = dts.as = 0 + expect_tz_aware = "%z" in fmt or "%Z" in fmt for i in range(n): val = values[i] @@ -129,12 +136,23 @@ def array_strptime(ndarray[object] values, str fmt, bint exact=True, errors='rai if val in nat_strings: iresult[i] = NPY_NAT continue - else: - if checknull_with_nat_and_na(val): - iresult[i] = NPY_NAT - continue + elif checknull_with_nat_and_na(val): + iresult[i] = NPY_NAT + continue + elif PyDateTime_Check(val): + if isinstance(val, _Timestamp): + iresult[i] = val.tz_localize(None)._as_unit("ns").value else: - val = str(val) + iresult[i] = pydatetime_to_dt64(val, &dts) + check_dts_bounds(&dts) + if val.tzinfo is None and expect_tz_aware: + raise ValueError("Cannot mix tz-aware with tz-naive values") + elif val.tzinfo is not None and not expect_tz_aware: + raise ValueError("Cannot mix tz-aware with tz-naive values") + result_timezone[i] = val.tzinfo + continue + else: + val = str(val) # exact matching if exact: diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 7791ea804a52a..84178f9203ec0 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -499,7 +499,7 @@ def _array_strptime_with_fallback( # Indicates to the caller to fallback to objects_to_datetime64ns return None else: - if "%Z" in fmt or "%z" in fmt: + if any(timezones): return _return_parsed_timezone_results(result, timezones, tz, name) return _box_as_indexlike(result, utc=utc, name=name) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index f524bc18793d8..6d4cf9dd240c8 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -609,6 +609,98 @@ def test_to_datetime_dtarr(self, tz): result = to_datetime(arr) assert result is arr + def test_to_datetime_arraylike_contains_pydatetime_and_timestamp(self): + # GH 49298 + # Test explicit custom format + case1 = [ + Timestamp("2001-10-01 12:00:01.123456789"), + datetime(2001, 10, 2, 12, 30, 1, 123456), + "10/03/01", + ] + result = to_datetime(case1, format="%m/%d/%y") + expected_data = [ + Timestamp("2001-10-01 12:00:01.123456789"), + Timestamp("2001-10-02 12:30:01.123456"), + Timestamp("2001-10-03 00:00:00"), + ] + tm.assert_equal(result, DatetimeIndex(expected_data)) + + # Test ISO8601 format + case2 = [ + Timestamp("2001-10-01 13:18:05"), + datetime(2001, 10, 2, 13, 18, 5), + "2001-10-03T13:18:05", + "20011004", + ] + result = to_datetime(case2) + expected_data = [ + Timestamp("2001-10-01 13:18:05"), + Timestamp("2001-10-02 13:18:05"), + Timestamp("2001-10-03 13:18:05"), + Timestamp("2001-10-04 00:00:00"), + ] + tm.assert_equal(result, DatetimeIndex(expected_data)) + + def test_to_datetime_arraylike_contains_pydatetime_and_timestamp_with_tz(self): + # GH 49298 + # Different offsets when utc=True + data = [ + "20100102 121314 +01:00", + "20100102 121315 -05:00", + pytz.timezone("Europe/Berlin").localize(datetime(2010, 1, 2, 12, 13, 16)), + pytz.timezone("US/Eastern").localize(Timestamp("2010-01-02 12:13:17")), + ] + expected_data = [ + Timestamp("2010-01-02 11:13:14", tz="utc"), + Timestamp("2010-01-02 17:13:15", tz="utc"), + Timestamp("2010-01-02 11:13:16", tz="utc"), + Timestamp("2010-01-02 17:13:17", tz="utc"), + ] + result = to_datetime(data, format="%Y%m%d %H%M%S %z", utc=True) + tm.assert_equal(result, DatetimeIndex(expected_data)) + + # Different offsets when utc=False + expected_data = [ + Timestamp("2010-01-02 12:13:14 +01:00"), + Timestamp("2010-01-02 12:13:15 -05:00"), + Timestamp("2010-01-02 12:13:16 +01:00"), + Timestamp("2010-01-02 12:13:17 -05:00"), + ] + result = to_datetime(data, format="%Y%m%d %H%M%S %z", utc=False) + tm.assert_equal(result, Index(expected_data)) + + @pytest.mark.parametrize("value", [datetime(2010, 1, 2, 12, 13, 16), Timestamp("2010-01-02 12:13:17")]) + def test_to_datetime_includes_tz_dtype_on_pydatetime_and_timestamp(self, value): + # GH 49298 + # No timezone + result_no_format = to_datetime([value]) + result_with_format = to_datetime([value], format="%m-%d-%Y") + tm.assert_equal(result_no_format, result_with_format) + + # Localized value + america_santiago = pytz.timezone("America/Santiago") + result_no_format = to_datetime([america_santiago.localize(value)]) + result_with_format = to_datetime([america_santiago.localize(value)], format="%m-%d-%Y %z") + tm.assert_equal(result_with_format.dtype.tz, america_santiago) + tm.assert_equal(result_no_format, result_with_format) + + @pytest.mark.parametrize("value", [datetime(2010, 1, 2, 12, 13, 16), Timestamp("2010-01-02 12:13:17")]) + def test_to_datetime_mixing_naive_tzaware_raises(self, value): + # GH 49298 + msg = "Cannot mix tz-aware with tz-naive values" + america_santiago = pytz.timezone("America/Santiago") + # Fail if format expects tz but input is not localized + with pytest.raises(ValueError, match=msg): + to_datetime([value], format="%m-%d-%Y %z") + # Fail if format does not expect tz but input is localized + with pytest.raises(ValueError, match=msg): + to_datetime([america_santiago.localize(value)], format="%m-%d-%Y") + # Mixed input should fail in both cases + with pytest.raises(ValueError, match=msg): + to_datetime([value, america_santiago.localize(value)], format="%m-%d-%Y %z") + with pytest.raises(ValueError, match=msg): + to_datetime([value, america_santiago.localize(value)], format="%m-%d-%Y") + def test_to_datetime_pydatetime(self): actual = to_datetime(datetime(2008, 1, 15)) assert actual == datetime(2008, 1, 15)