Skip to content

Commit 1104a92

Browse files
author
MarcoGorelli
committed
🗑️ deprecate infer_datetime_format, make strict
1 parent 75429df commit 1104a92

File tree

3 files changed

+53
-83
lines changed

3 files changed

+53
-83
lines changed

pandas/core/tools/datetimes.py

+34-58
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@
1919

2020
import numpy as np
2121

22-
from pandas._libs import tslib
22+
from pandas._libs import (
23+
lib,
24+
tslib,
25+
)
2326
from pandas._libs.tslibs import (
2427
OutOfBoundsDatetime,
2528
Timedelta,
@@ -331,7 +334,6 @@ def _convert_listlike_datetimes(
331334
tz: Timezone | None = None,
332335
unit: str | None = None,
333336
errors: DateTimeErrorChoices = "raise",
334-
infer_datetime_format: bool = False,
335337
dayfirst: bool | None = None,
336338
yearfirst: bool | None = None,
337339
exact: bool = True,
@@ -415,27 +417,19 @@ def _convert_listlike_datetimes(
415417
arg = ensure_object(arg)
416418
require_iso8601 = False
417419

418-
if infer_datetime_format and format is None:
420+
if format is None:
419421
format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)
420422

423+
# There is a special fast-path for iso8601 formatted
424+
# datetime strings, so in those cases don't use the inferred
425+
# format because this path makes process slower in this
426+
# special case
427+
if format is not None and format_is_iso(format):
428+
require_iso8601 = True
429+
format = None
421430
if format is not None:
422-
# There is a special fast-path for iso8601 formatted
423-
# datetime strings, so in those cases don't use the inferred
424-
# format because this path makes process slower in this
425-
# special case
426-
format_is_iso8601 = format_is_iso(format)
427-
if format_is_iso8601:
428-
require_iso8601 = not infer_datetime_format
429-
format = None
430-
431-
if format is not None:
432-
res = _to_datetime_with_format(
433-
arg, orig_arg, name, tz, format, exact, errors, infer_datetime_format
434-
)
435-
if res is not None:
436-
return res
431+
return _to_datetime_with_format(arg, orig_arg, name, tz, format, exact, errors)
437432

438-
assert format is None or infer_datetime_format
439433
utc = tz == "utc"
440434
result, tz_parsed = objects_to_datetime64ns(
441435
arg,
@@ -464,8 +458,7 @@ def _array_strptime_with_fallback(
464458
fmt: str,
465459
exact: bool,
466460
errors: str,
467-
infer_datetime_format: bool,
468-
) -> Index | None:
461+
) -> Index:
469462
"""
470463
Call array_strptime, with fallback behavior depending on 'errors'.
471464
"""
@@ -486,18 +479,14 @@ def _array_strptime_with_fallback(
486479
# if fmt was inferred, try falling back
487480
# to array_to_datetime - terminate here
488481
# for specified formats
489-
if not infer_datetime_format:
490-
if errors == "raise":
491-
raise
492-
elif errors == "coerce":
493-
result = np.empty(arg.shape, dtype="M8[ns]")
494-
iresult = result.view("i8")
495-
iresult.fill(iNaT)
496-
else:
497-
result = arg
482+
if errors == "raise":
483+
raise
484+
elif errors == "coerce":
485+
result = np.empty(arg.shape, dtype="M8[ns]")
486+
iresult = result.view("i8")
487+
iresult.fill(iNaT)
498488
else:
499-
# Indicates to the caller to fallback to objects_to_datetime64ns
500-
return None
489+
result = arg
501490
else:
502491
if "%Z" in fmt or "%z" in fmt:
503492
return _return_parsed_timezone_results(result, timezones, tz, name)
@@ -513,10 +502,9 @@ def _to_datetime_with_format(
513502
fmt: str,
514503
exact: bool,
515504
errors: str,
516-
infer_datetime_format: bool,
517-
) -> Index | None:
505+
) -> Index:
518506
"""
519-
Try parsing with the given format, returning None on failure.
507+
Try parsing with the given format.
520508
"""
521509
result = None
522510

@@ -537,9 +525,7 @@ def _to_datetime_with_format(
537525
return _box_as_indexlike(result, utc=utc, name=name)
538526

539527
# fallback
540-
res = _array_strptime_with_fallback(
541-
arg, name, tz, fmt, exact, errors, infer_datetime_format
542-
)
528+
res = _array_strptime_with_fallback(arg, name, tz, fmt, exact, errors)
543529
return res
544530

545531

@@ -713,7 +699,7 @@ def to_datetime(
713699
format: str | None = None,
714700
exact: bool = True,
715701
unit: str | None = None,
716-
infer_datetime_format: bool = False,
702+
infer_datetime_format: lib.NoDefault | bool = lib.no_default,
717703
origin: str = "unix",
718704
cache: bool = True,
719705
) -> DatetimeIndex | Series | DatetimeScalar | NaTType | None:
@@ -926,24 +912,6 @@ def to_datetime(
926912
1 2016-03-05
927913
dtype: datetime64[ns]
928914
929-
Passing ``infer_datetime_format=True`` can often-times speedup a parsing
930-
if its not an ISO8601 format exactly, but in a regular format.
931-
932-
>>> s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000'] * 1000)
933-
>>> s.head()
934-
0 3/11/2000
935-
1 3/12/2000
936-
2 3/13/2000
937-
3 3/11/2000
938-
4 3/12/2000
939-
dtype: object
940-
941-
>>> %timeit pd.to_datetime(s, infer_datetime_format=True) # doctest: +SKIP
942-
100 loops, best of 3: 10.4 ms per loop
943-
944-
>>> %timeit pd.to_datetime(s, infer_datetime_format=False) # doctest: +SKIP
945-
1 loop, best of 3: 471 ms per loop
946-
947915
Using a unix epoch time
948916
949917
>>> pd.to_datetime(1490195805, unit='s')
@@ -1060,6 +1028,15 @@ def to_datetime(
10601028
'2020-01-01 18:00:00+00:00', '2020-01-01 19:00:00+00:00'],
10611029
dtype='datetime64[ns, UTC]', freq=None)
10621030
"""
1031+
if infer_datetime_format is not lib.no_default:
1032+
warnings.warn(
1033+
"The argument 'infer_datetime_format' is deprecated and will "
1034+
"be removed in a future version. "
1035+
"A strict version of it is now the default, see "
1036+
"https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. "
1037+
"You can safely remove this argument.",
1038+
stacklevel=find_stack_level(),
1039+
)
10631040
if arg is None:
10641041
return None
10651042

@@ -1075,7 +1052,6 @@ def to_datetime(
10751052
yearfirst=yearfirst,
10761053
errors=errors,
10771054
exact=exact,
1078-
infer_datetime_format=infer_datetime_format,
10791055
)
10801056

10811057
result: Timestamp | NaTType | Series | Index

pandas/io/parsers/base_parser.py

-5
Original file line numberDiff line numberDiff line change
@@ -122,13 +122,11 @@ def __init__(self, kwds) -> None:
122122
self.true_values = kwds.get("true_values")
123123
self.false_values = kwds.get("false_values")
124124
self.mangle_dupe_cols = kwds.get("mangle_dupe_cols", True)
125-
self.infer_datetime_format = kwds.pop("infer_datetime_format", False)
126125
self.cache_dates = kwds.pop("cache_dates", True)
127126

128127
self._date_conv = _make_date_converter(
129128
date_parser=self.date_parser,
130129
dayfirst=self.dayfirst,
131-
infer_datetime_format=self.infer_datetime_format,
132130
cache_dates=self.cache_dates,
133131
)
134132

@@ -1105,7 +1103,6 @@ def _get_empty_meta(
11051103
def _make_date_converter(
11061104
date_parser=None,
11071105
dayfirst: bool = False,
1108-
infer_datetime_format: bool = False,
11091106
cache_dates: bool = True,
11101107
):
11111108
def converter(*date_cols):
@@ -1118,7 +1115,6 @@ def converter(*date_cols):
11181115
utc=None,
11191116
dayfirst=dayfirst,
11201117
errors="ignore",
1121-
infer_datetime_format=infer_datetime_format,
11221118
cache=cache_dates,
11231119
).to_numpy()
11241120

@@ -1188,7 +1184,6 @@ def converter(*date_cols):
11881184
"squeeze": None,
11891185
"compression": None,
11901186
"mangle_dupe_cols": True,
1191-
"infer_datetime_format": False,
11921187
"skip_blank_lines": True,
11931188
"encoding_errors": "strict",
11941189
"on_bad_lines": ParserBase.BadLineHandleMethod.ERROR,

pandas/io/parsers/readers.py

+19-20
Original file line numberDiff line numberDiff line change
@@ -262,11 +262,6 @@
262262
:ref:`io.csv.mixed_timezones` for more.
263263
264264
Note: A fast-path exists for iso8601-formatted dates.
265-
infer_datetime_format : bool, default False
266-
If True and `parse_dates` is enabled, pandas will attempt to infer the
267-
format of the datetime strings in the columns, and if it can be inferred,
268-
switch to a faster method of parsing them. In some cases this can increase
269-
the parsing speed by 5-10x.
270265
keep_date_col : bool, default False
271266
If True and `parse_dates` specifies combining multiple columns then
272267
keep the original columns.
@@ -483,7 +478,6 @@
483478
"decimal",
484479
"iterator",
485480
"dayfirst",
486-
"infer_datetime_format",
487481
"verbose",
488482
"skipinitialspace",
489483
"low_memory",
@@ -648,7 +642,7 @@ def read_csv(
648642
verbose: bool = ...,
649643
skip_blank_lines: bool = ...,
650644
parse_dates: bool | Sequence[Hashable] | None = ...,
651-
infer_datetime_format: bool = ...,
645+
infer_datetime_format: bool | lib.NoDefault = ...,
652646
keep_date_col: bool = ...,
653647
date_parser=...,
654648
dayfirst: bool = ...,
@@ -709,7 +703,7 @@ def read_csv(
709703
verbose: bool = ...,
710704
skip_blank_lines: bool = ...,
711705
parse_dates: bool | Sequence[Hashable] | None = ...,
712-
infer_datetime_format: bool = ...,
706+
infer_datetime_format: bool | lib.NoDefault = ...,
713707
keep_date_col: bool = ...,
714708
date_parser=...,
715709
dayfirst: bool = ...,
@@ -770,7 +764,7 @@ def read_csv(
770764
verbose: bool = ...,
771765
skip_blank_lines: bool = ...,
772766
parse_dates: bool | Sequence[Hashable] | None = ...,
773-
infer_datetime_format: bool = ...,
767+
infer_datetime_format: bool | lib.NoDefault = ...,
774768
keep_date_col: bool = ...,
775769
date_parser=...,
776770
dayfirst: bool = ...,
@@ -831,7 +825,7 @@ def read_csv(
831825
verbose: bool = ...,
832826
skip_blank_lines: bool = ...,
833827
parse_dates: bool | Sequence[Hashable] | None = ...,
834-
infer_datetime_format: bool = ...,
828+
infer_datetime_format: bool | lib.NoDefault = ...,
835829
keep_date_col: bool = ...,
836830
date_parser=...,
837831
dayfirst: bool = ...,
@@ -905,7 +899,7 @@ def read_csv(
905899
skip_blank_lines: bool = True,
906900
# Datetime Handling
907901
parse_dates: bool | Sequence[Hashable] | None = None,
908-
infer_datetime_format: bool = False,
902+
infer_datetime_format: bool | lib.NoDefault = lib.no_default,
909903
keep_date_col: bool = False,
910904
date_parser=None,
911905
dayfirst: bool = False,
@@ -940,6 +934,15 @@ def read_csv(
940934
storage_options: StorageOptions = None,
941935
use_nullable_dtypes: bool = False,
942936
) -> DataFrame | TextFileReader:
937+
if infer_datetime_format is not lib.no_default:
938+
warnings.warn(
939+
"The argument 'infer_datetime_format' is deprecated and will "
940+
"be removed in a future version. "
941+
"A strict version of it is now the default, see "
942+
"https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html. "
943+
"You can safely remove this argument.",
944+
stacklevel=find_stack_level(),
945+
)
943946
# locals() should never be modified
944947
kwds = locals().copy()
945948
del kwds["filepath_or_buffer"]
@@ -992,7 +995,7 @@ def read_table(
992995
verbose: bool = ...,
993996
skip_blank_lines: bool = ...,
994997
parse_dates: bool | Sequence[Hashable] = ...,
995-
infer_datetime_format: bool = ...,
998+
infer_datetime_format: bool | lib.NoDefault = ...,
996999
keep_date_col: bool = ...,
9971000
date_parser=...,
9981001
dayfirst: bool = ...,
@@ -1053,7 +1056,7 @@ def read_table(
10531056
verbose: bool = ...,
10541057
skip_blank_lines: bool = ...,
10551058
parse_dates: bool | Sequence[Hashable] = ...,
1056-
infer_datetime_format: bool = ...,
1059+
infer_datetime_format: bool | lib.NoDefault = ...,
10571060
keep_date_col: bool = ...,
10581061
date_parser=...,
10591062
dayfirst: bool = ...,
@@ -1114,7 +1117,7 @@ def read_table(
11141117
verbose: bool = ...,
11151118
skip_blank_lines: bool = ...,
11161119
parse_dates: bool | Sequence[Hashable] = ...,
1117-
infer_datetime_format: bool = ...,
1120+
infer_datetime_format: bool | lib.NoDefault = ...,
11181121
keep_date_col: bool = ...,
11191122
date_parser=...,
11201123
dayfirst: bool = ...,
@@ -1175,7 +1178,7 @@ def read_table(
11751178
verbose: bool = ...,
11761179
skip_blank_lines: bool = ...,
11771180
parse_dates: bool | Sequence[Hashable] = ...,
1178-
infer_datetime_format: bool = ...,
1181+
infer_datetime_format: bool | lib.NoDefault = ...,
11791182
keep_date_col: bool = ...,
11801183
date_parser=...,
11811184
dayfirst: bool = ...,
@@ -1249,7 +1252,7 @@ def read_table(
12491252
skip_blank_lines: bool = True,
12501253
# Datetime Handling
12511254
parse_dates: bool | Sequence[Hashable] = False,
1252-
infer_datetime_format: bool = False,
1255+
infer_datetime_format: bool | lib.NoDefault = lib.no_default,
12531256
keep_date_col: bool = False,
12541257
date_parser=None,
12551258
dayfirst: bool = False,
@@ -1883,10 +1886,6 @@ def TextParser(*args, **kwds) -> TextFileReader:
18831886
Encoding to use for UTF when reading/writing (ex. 'utf-8')
18841887
squeeze : bool, default False
18851888
returns Series if only one column.
1886-
infer_datetime_format: bool, default False
1887-
If True and `parse_dates` is True for a column, try to infer the
1888-
datetime format based on the first datetime string. If the format
1889-
can be inferred, there often will be a large parsing speed-up.
18901889
float_precision : str, optional
18911890
Specifies which converter the C engine should use for floating-point
18921891
values. The options are `None` or `high` for the ordinary converter,

0 commit comments

Comments
 (0)