diff --git a/doc/source/reference/testing.rst b/doc/source/reference/testing.rst index 1f164d1aa98b4..7513ab6afa6f2 100644 --- a/doc/source/reference/testing.rst +++ b/doc/source/reference/testing.rst @@ -59,6 +59,7 @@ Exceptions and warnings errors.PyperclipException errors.PyperclipWindowsException errors.SpecificationError + errors.StrftimeErrorWarning errors.UndefinedVariableError errors.UnsortedIndexError errors.UnsupportedFunctionCall diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index c77348b365370..3662c71432242 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -39,6 +39,7 @@ Other enhancements - Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`) - :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`) - :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`) +- :func:`DatetimeIndex.strftime` and :func:`DatetimeArray.strftime` now have an optional ``errors`` parameter, which can be set to ``'ignore'`` or ``'warn'`` to interpret bad datetimes as ``None``. The latter also throws a warning (:issue:`58178`) - :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`) - :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`) - :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`) diff --git a/pandas/_libs/tslib.pyi b/pandas/_libs/tslib.pyi index 7e3372a80db9d..09b195c9267e3 100644 --- a/pandas/_libs/tslib.pyi +++ b/pandas/_libs/tslib.pyi @@ -10,6 +10,7 @@ def format_array_from_datetime( format: str | None = ..., na_rep: str | float = ..., reso: int = ..., # NPY_DATETIMEUNIT + errors: str | None = ..., ) -> npt.NDArray[np.object_]: ... def first_non_null(values: np.ndarray) -> int: ... def array_to_datetime( diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index dca3ba0ce49b3..3eca35b57b0ec 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -21,8 +21,12 @@ from numpy cimport ( ndarray, ) +import warnings + import numpy as np +from pandas.util._exceptions import find_stack_level + cnp.import_array() from pandas._libs.tslibs.dtypes cimport ( @@ -81,6 +85,8 @@ from pandas._libs.tslibs.timestamps import Timestamp from pandas._libs.missing cimport checknull_with_nat_and_na from pandas._libs.tslibs.tzconversion cimport tz_localize_to_utc_single +from pandas.errors import StrftimeErrorWarning + def _test_parse_iso8601(ts: str): """ @@ -115,6 +121,7 @@ def format_array_from_datetime( str format=None, na_rep: str | float = "NaT", NPY_DATETIMEUNIT reso=NPY_FR_ns, + str errors="raise", ) -> np.ndarray: """ return a np object array of the string formatted values @@ -128,6 +135,7 @@ def format_array_from_datetime( na_rep : optional, default is None a nat format reso : NPY_DATETIMEUNIT, default NPY_FR_ns + errors : str, "raise" (default), "ignore", or "warn" Returns ------- @@ -148,7 +156,6 @@ def format_array_from_datetime( ndarray result = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0) object[::1] res_flat = result.ravel() # should NOT be a copy cnp.flatiter it = cnp.PyArray_IterNew(values) - if tz is None: # if we don't have a format nor tz, then choose # a format based on precision @@ -201,21 +208,31 @@ def format_array_from_datetime( res += f".{dts.us // 1000:03d}" else: - ts = Timestamp._from_value_and_reso(val, reso=reso, tz=tz) if format is None: # Use datetime.str, that returns ts.isoformat(sep=' ') res = str(ts) - else: - - # invalid format string - # requires dates > 1900 + elif (errors == "warn"): + try: + res = ts.strftime(format) + except (ValueError, NotImplementedError): + # Catches errors and replaces result with None + mesg= "The following timestamp could not be converted to string:" +\ + f" [{ts}]. Set errors='raise' to see the details" + warnings.warn( + mesg, StrftimeErrorWarning, stacklevel=find_stack_level()) + + res = None + elif (errors == "ignore"): try: - # Note: dispatches to pydatetime res = ts.strftime(format) - except ValueError: - # Use datetime.str, that returns ts.isoformat(sep=' ') - res = str(ts) + except Exception: + res = None + + else: + # Do not catch errors, allow them to raise up through + # errors == 'raise' + res = ts.strftime(format) # Note: we can index result directly instead of using PyArray_MultiIter_DATA # like we do for the other functions because result is known C-contiguous diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index ab17ae43215d2..c10c03056fdd6 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -334,7 +334,7 @@ def asi8(self) -> npt.NDArray[np.int64]: # Rendering Methods def _format_native_types( - self, *, na_rep: str | float = "NaT", date_format=None + self, *, na_rep: str | float = "NaT", date_format=None, errors="raise" ) -> npt.NDArray[np.object_]: """ Helper method for astype when converting to strings. @@ -1742,7 +1742,7 @@ class DatelikeOps(DatetimeLikeArrayMixin): URL="https://docs.python.org/3/library/datetime.html" "#strftime-and-strptime-behavior" ) - def strftime(self, date_format: str) -> npt.NDArray[np.object_]: + def strftime(self, date_format: str, errors="raise") -> npt.NDArray[np.object_]: """ Convert to Index using specified date_format. @@ -1763,6 +1763,12 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: ---------- date_format : str Date format string (e.g. "%%Y-%%m-%%d"). + errors : 'ignore', 'warn', default 'raise' + + - 'raise' (default) does not catch any underlying error and raises them. + - 'ignore' catches all errors and silently replace the output with None. + - 'warn' has the same behaviour as 'ignore' and issue a StrftimeErrorWarning + warning message. Returns ------- @@ -1786,7 +1792,9 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: 'March 10, 2018, 09:00:02 AM'], dtype='object') """ - result = self._format_native_types(date_format=date_format, na_rep=np.nan) + result = self._format_native_types( + date_format=date_format, na_rep=np.nan, errors=errors + ) return result.astype(object, copy=False) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 0f59d62339bf2..de358607bde3e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -760,14 +760,19 @@ def astype(self, dtype, copy: bool = True): # Rendering Methods def _format_native_types( - self, *, na_rep: str | float = "NaT", date_format=None, **kwargs + self, *, na_rep: str | float = "NaT", date_format=None, errors="raise", **kwargs ) -> npt.NDArray[np.object_]: if date_format is None and self._is_dates_only: # Only dates and no timezone: provide a default format date_format = "%Y-%m-%d" return tslib.format_array_from_datetime( - self.asi8, tz=self.tz, format=date_format, na_rep=na_rep, reso=self._creso + self.asi8, + tz=self.tz, + format=date_format, + na_rep=na_rep, + reso=self._creso, + errors=errors, ) # ----------------------------------------------------------------- diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 951455b627fbd..21abc84b89fae 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -262,8 +262,8 @@ def _engine_type(self) -> type[libindex.DatetimeEngine]: # methods that dispatch to DatetimeArray and wrap result @doc(DatetimeArray.strftime) - def strftime(self, date_format) -> Index: - arr = self._data.strftime(date_format) + def strftime(self, date_format, errors="raise") -> Index: + arr = self._data.strftime(date_format, errors) return Index(arr, name=self.name, dtype=object) @doc(DatetimeArray.tz_convert) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index f01fe8ecef930..091482e05e90f 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -687,6 +687,21 @@ class CategoricalConversionWarning(Warning): """ +class StrftimeErrorWarning(Warning): + """ + Raised by ``Series.dt.strftime`` when an error occurs, when errors is set to 'warn'. + + See Also + -------- + Series.dt.strftime : Convert to Index using specified date_format. + + Examples + -------- + >>> dta = pd.DatetimeIndex(np.array(["-0020-01-01", "2020-01-02"], "datetime64[s]")) + >>> dta.strftime("%y", errors="warn") # doctest: +SKIP + """ + + class LossySetitemError(Exception): """ Raised when trying to do a __setitem__ on an np.ndarray that is not lossless. @@ -749,6 +764,7 @@ class InvalidComparison(Exception): "PyperclipException", "PyperclipWindowsException", "SpecificationError", + "StrftimeErrorWarning", "UndefinedVariableError", "UnsortedIndexError", "UnsupportedFunctionCall", diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 3d8f8d791b763..2835a7be87e0f 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -13,6 +13,7 @@ ) from pandas._libs.tslibs import to_offset from pandas.compat.numpy import np_version_gt2 +from pandas.errors import StrftimeErrorWarning from pandas.core.dtypes.dtypes import PeriodDtype @@ -895,6 +896,68 @@ def test_strftime(self, arr1d): expected = np.array([ts.strftime("%Y %b") for ts in arr], dtype=object) tm.assert_numpy_array_equal(result, expected) + # additional tests for error parameter below + result = arr.strftime("%Y %b", "ignore") + expected = np.array([ts.strftime("%Y %b") for ts in arr], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = arr.strftime("%Y %b", "warn") + expected = np.array([ts.strftime("%Y %b") for ts in arr], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = arr.strftime("%Y %b", "raise") + expected = np.array([ts.strftime("%Y %b") for ts in arr], dtype=object) + tm.assert_numpy_array_equal(result, expected) + + def test_strftime_err(self): + arr = DatetimeIndex(np.array(["1820-01-01", "2020-01-02"], "datetime64[s]")) + + windowFlag = False + try: + _ = arr[0].strftime("%y") + expected = pd.Index(["20", "20"], dtype="object") + except ValueError: + windowFlag = True + expected = pd.Index([None, "20"], dtype="object") + + if windowFlag: + with tm.assert_produces_warning(StrftimeErrorWarning): + result = arr.strftime("%y", "warn") + + with pytest.raises( + ValueError, match="format %y requires year >= 1900 on Windows" + ): + result = arr.strftime("%y", "raise") + + with pytest.raises( + ValueError, match="format %y requires year >= 1900 on Windows" + ): + result = arr[0].strftime("%y") + + result = arr.strftime("%y", "ignore") + tm.assert_index_equal(result, expected) + + else: + # "1820-01-01" should be converted properly if not on Windows. + result = arr.strftime("%y", "warn") + tm.assert_index_equal(result, expected) + + result = arr.strftime("%y", "raise") + tm.assert_index_equal(result, expected) + + result = arr.strftime("%y", "ignore") + tm.assert_index_equal(result, expected) + + arr2 = DatetimeIndex(np.array(["-0020-01-01", "2020-01-02"], "datetime64[s]")) + expected = pd.Index([None, "20"], dtype="object") + + with pytest.raises(NotImplementedError): + result = arr2.strftime("%y", "raise") + with tm.assert_produces_warning(StrftimeErrorWarning): + result = arr2.strftime("%y", "warn") + result = arr2.strftime("%y", "ignore") + tm.assert_index_equal(result, expected) + def test_strftime_nat(self): # GH 29578 arr = DatetimeIndex(["2019-01-01", NaT])._data diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py index c5c4b234eb129..7fbb9752acf36 100644 --- a/pandas/tests/test_errors.py +++ b/pandas/tests/test_errors.py @@ -38,6 +38,7 @@ "PossiblePrecisionLoss", "PyperclipException", "SpecificationError", + "StrftimeErrorWarning", "UnsortedIndexError", "UnsupportedFunctionCall", "ValueLabelTypeMismatch",