From 80a4f36e505e72e5d623d3cf91a99655294d0812 Mon Sep 17 00:00:00 2001 From: Compro Prasad Date: Fri, 21 Jan 2022 21:50:48 +0530 Subject: [PATCH] Fixes #45506 Catch overflow error when converting to datetime Exception is raised when a part of the date like day, month or year is greater than 32 bit signed integer. Added tests for this issue in pandas/tests/series/methods/test_compare.py Added whatsnew entry for v1.5.0 --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/core/arrays/datetimes.py | 8 ++++++- pandas/tests/series/methods/test_compare.py | 25 +++++++++++++++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 919ed926f8195..41b95532f1511 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -247,7 +247,7 @@ Conversion - Bug in :meth:`Float64Index.astype` to unsigned integer dtype incorrectly casting to ``np.int64`` dtype (:issue:`45309`) - Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` from floating dtype to unsigned integer dtype failing to raise in the presence of negative values (:issue:`45151`) - Bug in :func:`array` with ``FloatingDtype`` and values containing float-castable strings incorrectly raising (:issue:`45424`) -- +- Bug when comparing string and datetime64ns objects causing ``OverflowError`` exception. (:issue:`45506`) Strings ^^^^^^^ diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 9c262fa37d760..8ac1f54059154 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -37,7 +37,10 @@ tzconversion, ) from pandas._typing import npt -from pandas.errors import PerformanceWarning +from pandas.errors import ( + OutOfBoundsDatetime, + PerformanceWarning, +) from pandas.util._exceptions import find_stack_level from pandas.util._validators import validate_inclusive @@ -2215,6 +2218,9 @@ def objects_to_datetime64ns( return values.view("i8"), tz_parsed except (ValueError, TypeError): raise err + except OverflowError as err: + # Exception is raised when a part of date is greater than 32 bit signed int + raise OutOfBoundsDatetime("Out of bounds nanosecond timestamp") from err if tz_parsed is not None: # We can take a shortcut since the datetime64 numpy array diff --git a/pandas/tests/series/methods/test_compare.py b/pandas/tests/series/methods/test_compare.py index 8570800048898..fe2016a245ec7 100644 --- a/pandas/tests/series/methods/test_compare.py +++ b/pandas/tests/series/methods/test_compare.py @@ -114,3 +114,28 @@ def test_compare_unaligned_objects(): ser1 = pd.Series([1, 2, 3]) ser2 = pd.Series([1, 2, 3, 4]) ser1.compare(ser2) + + +def test_compare_datetime64_and_string(): + # Issue https://github.com/pandas-dev/pandas/issues/45506 + # Catch OverflowError when comparing datetime64 and string + data = [ + {"a": "2015-07-01", "b": "08335394550"}, + {"a": "2015-07-02", "b": "+49 (0) 0345 300033"}, + {"a": "2015-07-03", "b": "+49(0)2598 04457"}, + {"a": "2015-07-04", "b": "0741470003"}, + {"a": "2015-07-05", "b": "04181 83668"}, + ] + dtypes = {"a": "datetime64[ns]", "b": "string"} + df = pd.DataFrame(data=data).astype(dtypes) + + result_eq1 = df["a"].eq(df["b"]) + result_eq2 = df["a"] == df["b"] + result_neq = df["a"] != df["b"] + + expected_eq = pd.Series([False] * 5) # For .eq and == + expected_neq = pd.Series([True] * 5) # For != + + tm.assert_series_equal(result_eq1, expected_eq) + tm.assert_series_equal(result_eq2, expected_eq) + tm.assert_series_equal(result_neq, expected_neq)