Skip to content

Remove support for errors="ignore" in to_datetime, to_timedelta and to_numeric #57361

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Feb 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 11 additions & 27 deletions asv_bench/benchmarks/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,20 @@


class ToNumeric:
params = ["ignore", "coerce"]
param_names = ["errors"]

def setup(self, errors):
def setup(self):
N = 10000
self.float = Series(np.random.randn(N))
self.numstr = self.float.astype("str")
self.str = Series(Index([f"i-{i}" for i in range(N)], dtype=object))

def time_from_float(self, errors):
to_numeric(self.float, errors=errors)
def time_from_float(self):
to_numeric(self.float, errors="coerce")

def time_from_numeric_str(self, errors):
to_numeric(self.numstr, errors=errors)
def time_from_numeric_str(self):
to_numeric(self.numstr, errors="coerce")

def time_from_str(self, errors):
to_numeric(self.str, errors=errors)
def time_from_str(self):
to_numeric(self.str, errors="coerce")


class ToNumericDowncast:
Expand Down Expand Up @@ -187,7 +184,7 @@ def time_iso8601_tz_spaceformat(self):

def time_iso8601_infer_zero_tz_fromat(self):
# GH 41047
to_datetime(self.strings_zero_tz, infer_datetime_format=True)
to_datetime(self.strings_zero_tz)


class ToDatetimeNONISO8601:
Expand Down Expand Up @@ -271,16 +268,6 @@ def time_dup_string_tzoffset_dates(self, cache):
to_datetime(self.dup_string_with_tz, cache=cache)


# GH 43901
class ToDatetimeInferDatetimeFormat:
def setup(self):
rng = date_range(start="1/1/2000", periods=100000, freq="h")
self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist()

def time_infer_datetime_format(self):
to_datetime(self.strings, infer_datetime_format=True)


class ToTimedelta:
def setup(self):
self.ints = np.random.randint(0, 60, size=10000)
Expand All @@ -301,16 +288,13 @@ def time_convert_string_seconds(self):


class ToTimedeltaErrors:
params = ["coerce", "ignore"]
param_names = ["errors"]

def setup(self, errors):
def setup(self):
ints = np.random.randint(0, 60, size=10000)
self.arr = [f"{i} days" for i in ints]
self.arr[-1] = "apple"

def time_convert(self, errors):
to_timedelta(self.arr, errors=errors)
def time_convert(self):
to_timedelta(self.arr, errors="coerce")


from .pandas_vb_common import setup # noqa: F401 isort:skip
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ Removal of prior version deprecations/changes
- Removed ``read_gbq`` and ``DataFrame.to_gbq``. Use ``pandas_gbq.read_gbq`` and ``pandas_gbq.to_gbq`` instead https://pandas-gbq.readthedocs.io/en/latest/api.html (:issue:`55525`)
- Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`)
- Removed deprecated behavior of :meth:`Series.agg` using :meth:`Series.apply` (:issue:`53325`)
- Removed support for ``errors="ignore"`` in :func:`to_datetime`, :func:`to_timedelta` and :func:`to_numeric` (:issue:`55734`)
- Removed the ``ArrayManager`` (:issue:`55043`)
- Removed the ``fastpath`` argument from the :class:`Series` constructor (:issue:`55466`)
- Removed the ``is_boolean``, ``is_integer``, ``is_floating``, ``holds_integer``, ``is_numeric``, ``is_categorical``, ``is_object``, and ``is_interval`` attributes of :class:`Index` (:issue:`50042`)
Expand Down
14 changes: 3 additions & 11 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -272,14 +272,13 @@ def array_with_unit_to_datetime(
"""
cdef:
Py_ssize_t i, n=len(values)
bint is_ignore = errors == "ignore"
bint is_coerce = errors == "coerce"
bint is_raise = errors == "raise"
ndarray[int64_t] iresult
tzinfo tz = None
float fval

assert is_ignore or is_coerce or is_raise
assert is_coerce or is_raise

if unit == "ns":
result, tz = array_to_datetime(
Expand Down Expand Up @@ -342,11 +341,6 @@ def array_with_unit_to_datetime(
if is_raise:
err.args = (f"{err}, at position {i}",)
raise
elif is_ignore:
# we have hit an exception
# and are in ignore mode
# redo as object
return _array_with_unit_to_datetime_object_fallback(values, unit)
else:
# is_coerce
iresult[i] = NPY_NAT
Expand Down Expand Up @@ -461,7 +455,6 @@ cpdef array_to_datetime(
bint utc_convert = bool(utc)
bint seen_datetime_offset = False
bint is_raise = errors == "raise"
bint is_ignore = errors == "ignore"
bint is_coerce = errors == "coerce"
bint is_same_offsets
_TSObject tsobj
Expand All @@ -475,7 +468,7 @@ cpdef array_to_datetime(
str abbrev

# specify error conditions
assert is_raise or is_ignore or is_coerce
assert is_raise or is_coerce

if infer_reso:
abbrev = "ns"
Expand Down Expand Up @@ -687,7 +680,6 @@ cdef _array_to_datetime_object(
cdef:
Py_ssize_t i, n = values.size
object val
bint is_ignore = errors == "ignore"
bint is_coerce = errors == "coerce"
bint is_raise = errors == "raise"
ndarray oresult_nd
Expand All @@ -696,7 +688,7 @@ cdef _array_to_datetime_object(
cnp.broadcast mi
_TSObject tsobj

assert is_raise or is_ignore or is_coerce
assert is_raise or is_coerce

oresult_nd = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0)
mi = cnp.PyArray_MultiIterNew2(oresult_nd, values)
Expand Down
11 changes: 4 additions & 7 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ def array_strptime(
values : ndarray of string-like objects
fmt : string-like regex
exact : matches must be exact if True, search if False
errors : string specifying error handling, {'raise', 'ignore', 'coerce'}
errors : string specifying error handling, {'raise', 'coerce'}
creso : NPY_DATETIMEUNIT, default NPY_FR_ns
Set to NPY_FR_GENERIC to infer a resolution.
"""
Expand All @@ -322,7 +322,6 @@ def array_strptime(
object val
bint seen_datetime_offset = False
bint is_raise = errors=="raise"
bint is_ignore = errors=="ignore"
bint is_coerce = errors=="coerce"
bint is_same_offsets
set out_tzoffset_vals = set()
Expand All @@ -334,7 +333,7 @@ def array_strptime(
bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC
DatetimeParseState state = DatetimeParseState(creso)

assert is_raise or is_ignore or is_coerce
assert is_raise or is_coerce

_validate_fmt(fmt)
format_regex, locale_time = _get_format_regex(fmt)
Expand Down Expand Up @@ -806,14 +805,13 @@ def _array_strptime_object_fallback(
object val
tzinfo tz
bint is_raise = errors=="raise"
bint is_ignore = errors=="ignore"
bint is_coerce = errors=="coerce"
bint iso_format = format_is_iso(fmt)
NPY_DATETIMEUNIT creso, out_bestunit, item_reso
int out_local = 0, out_tzoffset = 0
bint string_to_dts_succeeded = 0

assert is_raise or is_ignore or is_coerce
assert is_raise or is_coerce

item_reso = NPY_DATETIMEUNIT.NPY_FR_GENERIC
format_regex, locale_time = _get_format_regex(fmt)
Expand Down Expand Up @@ -922,9 +920,8 @@ def _array_strptime_object_fallback(
if is_coerce:
result[i] = NaT
continue
elif is_raise:
else:
raise
return values

import warnings

Expand Down
2 changes: 1 addition & 1 deletion pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ def closed(self) -> bool:

# datetime and NaTType
DatetimeNaTType = Union[datetime, "NaTType"]
DateTimeErrorChoices = Union[IgnoreRaise, Literal["coerce"]]
DateTimeErrorChoices = Literal["raise", "coerce"]

# sort_index
SortKind = Literal["quicksort", "mergesort", "heapsort", "stable"]
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2394,7 +2394,7 @@ def objects_to_datetime64(
yearfirst : bool
utc : bool, default False
Whether to convert/localize timestamps to UTC.
errors : {'raise', 'ignore', 'coerce'}
errors : {'raise', 'coerce'}
allow_object : bool
Whether to return an object-dtype ndarray instead of raising if the
data contains more than one timezone.
Expand All @@ -2414,7 +2414,7 @@ def objects_to_datetime64(
ValueError : if data cannot be converted to datetimes
TypeError : When a type cannot be converted to datetime
"""
assert errors in ["raise", "ignore", "coerce"]
assert errors in ["raise", "coerce"]

# if str-dtype, convert
data = np.array(data, copy=False, dtype=np.object_)
Expand Down
52 changes: 4 additions & 48 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ def _convert_listlike_datetimes(
unit : str
None or string of the frequency of the passed data
errors : str
error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
error handing behaviors from to_datetime, 'raise', 'coerce'
dayfirst : bool
dayfirst parsing behavior from to_datetime
yearfirst : bool
Expand Down Expand Up @@ -387,7 +387,6 @@ def _convert_listlike_datetimes(
if not is_supported_dtype(arg_dtype):
# We go to closest supported reso, i.e. "s"
arg = astype_overflowsafe(
# TODO: looks like we incorrectly raise with errors=="ignore"
np.asarray(arg),
np.dtype("M8[s]"),
is_coerce=errors == "coerce",
Expand Down Expand Up @@ -418,9 +417,6 @@ def _convert_listlike_datetimes(
if errors == "coerce":
npvalues = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg))
return DatetimeIndex(npvalues, name=name)
elif errors == "ignore":
idx = Index(arg, name=name)
return idx
raise

arg = ensure_object(arg)
Expand Down Expand Up @@ -525,12 +521,7 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
arg = arg.astype(object, copy=False)
arr, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)

if errors == "ignore":
# Index constructor _may_ infer to DatetimeIndex
result = Index._with_infer(arr, name=name)
else:
result = DatetimeIndex(arr, name=name)

result = DatetimeIndex(arr, name=name)
if not isinstance(result, DatetimeIndex):
return result

Expand Down Expand Up @@ -629,7 +620,6 @@ def to_datetime(
format: str | None = ...,
exact: bool = ...,
unit: str | None = ...,
infer_datetime_format: bool = ...,
origin=...,
cache: bool = ...,
) -> Timestamp:
Expand All @@ -646,7 +636,6 @@ def to_datetime(
format: str | None = ...,
exact: bool = ...,
unit: str | None = ...,
infer_datetime_format: bool = ...,
origin=...,
cache: bool = ...,
) -> Series:
Expand All @@ -663,7 +652,6 @@ def to_datetime(
format: str | None = ...,
exact: bool = ...,
unit: str | None = ...,
infer_datetime_format: bool = ...,
origin=...,
cache: bool = ...,
) -> DatetimeIndex:
Expand All @@ -679,7 +667,6 @@ def to_datetime(
format: str | None = None,
exact: bool | lib.NoDefault = lib.no_default,
unit: str | None = None,
infer_datetime_format: lib.NoDefault | bool = lib.no_default,
origin: str = "unix",
cache: bool = True,
) -> DatetimeIndex | Series | DatetimeScalar | NaTType | None:
Expand All @@ -696,10 +683,9 @@ def to_datetime(
method expects minimally the following columns: :const:`"year"`,
:const:`"month"`, :const:`"day"`. The column "year"
must be specified in 4-digit format.
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
errors : {'raise', 'coerce'}, default 'raise'
- If :const:`'raise'`, then invalid parsing will raise an exception.
- If :const:`'coerce'`, then invalid parsing will be set as :const:`NaT`.
- If :const:`'ignore'`, then invalid parsing will return the input.
dayfirst : bool, default False
Specify a date parse order if `arg` is str or is list-like.
If :const:`True`, parses dates with the day first, e.g. :const:`"10/11/12"`
Expand Down Expand Up @@ -780,16 +766,6 @@ def to_datetime(
integer or float number. This will be based off the origin.
Example, with ``unit='ms'`` and ``origin='unix'``, this would calculate
the number of milliseconds to the unix epoch start.
infer_datetime_format : bool, default False
If :const:`True` and no `format` is given, attempt to infer the format
of the datetime strings based on the first non-NaN element,
and if it can be inferred, switch to a faster method of parsing them.
In some cases this can increase the parsing speed by ~5-10x.

.. deprecated:: 2.0.0
A strict version of this argument is now the default, passing it has
no effect.

origin : scalar, default 'unix'
Define the reference date. The numeric values would be parsed as number
of units (defined by `unit`) since this reference date.
Expand Down Expand Up @@ -1012,25 +988,6 @@ def to_datetime(
"""
if exact is not lib.no_default and format in {"mixed", "ISO8601"}:
raise ValueError("Cannot use 'exact' when 'format' is 'mixed' or 'ISO8601'")
if infer_datetime_format is not lib.no_default:
warnings.warn(
"The argument 'infer_datetime_format' is deprecated and will "
"be removed in a future version. "
"A strict version of it is now the default, see "
"https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. "
"You can safely remove this argument.",
stacklevel=find_stack_level(),
)
if errors == "ignore":
# GH#54467
warnings.warn(
"errors='ignore' is deprecated and will raise in a future version. "
"Use to_datetime without passing `errors` and catch exceptions "
"explicitly instead",
FutureWarning,
stacklevel=find_stack_level(),
)

if arg is None:
return None

Expand Down Expand Up @@ -1141,11 +1098,10 @@ def _assemble_from_unit_mappings(
Parameters
----------
arg : DataFrame
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
errors : {'raise', 'coerce'}, default 'raise'

- If :const:`'raise'`, then invalid parsing will raise an exception
- If :const:`'coerce'`, then invalid parsing will be set as :const:`NaT`
- If :const:`'ignore'`, then invalid parsing will return the input
utc : bool
Whether to convert/localize timestamps to UTC.

Expand Down
Loading