Skip to content

Commit d8b3730

Browse files
author
MarcoGorelli
committed
remove infer_datetime_format
1 parent b35f16d commit d8b3730

File tree

5 files changed

+75
-118
lines changed

5 files changed

+75
-118
lines changed

pandas/core/tools/datetimes.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,10 @@
2020

2121
import numpy as np
2222

23-
from pandas._libs import tslib
23+
from pandas._libs import (
24+
lib,
25+
tslib,
26+
)
2427
from pandas._libs.tslibs import (
2528
OutOfBoundsDatetime,
2629
Timedelta,
@@ -706,7 +709,7 @@ def to_datetime(
706709
format: str | None = None,
707710
exact: bool = True,
708711
unit: str | None = None,
709-
infer_datetime_format: bool = False,
712+
infer_datetime_format: lib.NoDefault | bool = lib.no_default,
710713
origin: str = "unix",
711714
cache: bool = True,
712715
) -> DatetimeIndex | Series | DatetimeScalar | NaTType | None:
@@ -1053,6 +1056,14 @@ def to_datetime(
10531056
'2020-01-01 18:00:00+00:00', '2020-01-01 19:00:00+00:00'],
10541057
dtype='datetime64[ns, UTC]', freq=None)
10551058
"""
1059+
if infer_datetime_format is not lib.no_default:
1060+
# Kept for compatibility with old code - TODO remove
1061+
warnings.warn(
1062+
"The argument 'infer_datetime_format' has been removed - a strict version "
1063+
"of it is now the default, see "
1064+
"https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html",
1065+
stacklevel=find_stack_level(inspect.currentframe()),
1066+
)
10561067
if arg is None:
10571068
return None
10581069

pandas/io/parsers/base_parser.py

-5
Original file line numberDiff line numberDiff line change
@@ -123,13 +123,11 @@ def __init__(self, kwds) -> None:
123123
self.true_values = kwds.get("true_values")
124124
self.false_values = kwds.get("false_values")
125125
self.mangle_dupe_cols = kwds.get("mangle_dupe_cols", True)
126-
self.infer_datetime_format = kwds.pop("infer_datetime_format", False)
127126
self.cache_dates = kwds.pop("cache_dates", True)
128127

129128
self._date_conv = _make_date_converter(
130129
date_parser=self.date_parser,
131130
dayfirst=self.dayfirst,
132-
infer_datetime_format=self.infer_datetime_format,
133131
cache_dates=self.cache_dates,
134132
)
135133

@@ -1106,7 +1104,6 @@ def _get_empty_meta(
11061104
def _make_date_converter(
11071105
date_parser=None,
11081106
dayfirst: bool = False,
1109-
infer_datetime_format: bool = False,
11101107
cache_dates: bool = True,
11111108
):
11121109
def converter(*date_cols):
@@ -1119,7 +1116,6 @@ def converter(*date_cols):
11191116
utc=None,
11201117
dayfirst=dayfirst,
11211118
errors="ignore",
1122-
infer_datetime_format=infer_datetime_format,
11231119
cache=cache_dates,
11241120
).to_numpy()
11251121

@@ -1189,7 +1185,6 @@ def converter(*date_cols):
11891185
"squeeze": None,
11901186
"compression": None,
11911187
"mangle_dupe_cols": True,
1192-
"infer_datetime_format": False,
11931188
"skip_blank_lines": True,
11941189
"encoding_errors": "strict",
11951190
"on_bad_lines": ParserBase.BadLineHandleMethod.ERROR,

pandas/io/parsers/readers.py

+18-20
Original file line numberDiff line numberDiff line change
@@ -263,11 +263,6 @@
263263
:ref:`io.csv.mixed_timezones` for more.
264264
265265
Note: A fast-path exists for iso8601-formatted dates.
266-
infer_datetime_format : bool, default False
267-
If True and `parse_dates` is enabled, pandas will attempt to infer the
268-
format of the datetime strings in the columns, and if it can be inferred,
269-
switch to a faster method of parsing them. In some cases this can increase
270-
the parsing speed by 5-10x.
271266
keep_date_col : bool, default False
272267
If True and `parse_dates` specifies combining multiple columns then
273268
keep the original columns.
@@ -484,7 +479,6 @@
484479
"decimal",
485480
"iterator",
486481
"dayfirst",
487-
"infer_datetime_format",
488482
"verbose",
489483
"skipinitialspace",
490484
"low_memory",
@@ -649,7 +643,7 @@ def read_csv(
649643
verbose: bool = ...,
650644
skip_blank_lines: bool = ...,
651645
parse_dates: bool | Sequence[Hashable] | None = ...,
652-
infer_datetime_format: bool = ...,
646+
infer_datetime_format: bool | lib.NoDefault = ...,
653647
keep_date_col: bool = ...,
654648
date_parser=...,
655649
dayfirst: bool = ...,
@@ -710,7 +704,7 @@ def read_csv(
710704
verbose: bool = ...,
711705
skip_blank_lines: bool = ...,
712706
parse_dates: bool | Sequence[Hashable] | None = ...,
713-
infer_datetime_format: bool = ...,
707+
infer_datetime_format: bool | lib.NoDefault = ...,
714708
keep_date_col: bool = ...,
715709
date_parser=...,
716710
dayfirst: bool = ...,
@@ -771,7 +765,7 @@ def read_csv(
771765
verbose: bool = ...,
772766
skip_blank_lines: bool = ...,
773767
parse_dates: bool | Sequence[Hashable] | None = ...,
774-
infer_datetime_format: bool = ...,
768+
infer_datetime_format: bool | lib.NoDefault = ...,
775769
keep_date_col: bool = ...,
776770
date_parser=...,
777771
dayfirst: bool = ...,
@@ -832,7 +826,7 @@ def read_csv(
832826
verbose: bool = ...,
833827
skip_blank_lines: bool = ...,
834828
parse_dates: bool | Sequence[Hashable] | None = ...,
835-
infer_datetime_format: bool = ...,
829+
infer_datetime_format: bool | lib.NoDefault = ...,
836830
keep_date_col: bool = ...,
837831
date_parser=...,
838832
dayfirst: bool = ...,
@@ -906,7 +900,7 @@ def read_csv(
906900
skip_blank_lines: bool = True,
907901
# Datetime Handling
908902
parse_dates: bool | Sequence[Hashable] | None = None,
909-
infer_datetime_format: bool = False,
903+
infer_datetime_format: bool | lib.NoDefault = lib.no_default,
910904
keep_date_col: bool = False,
911905
date_parser=None,
912906
dayfirst: bool = False,
@@ -941,6 +935,14 @@ def read_csv(
941935
storage_options: StorageOptions = None,
942936
use_nullable_dtypes: bool = False,
943937
) -> DataFrame | TextFileReader:
938+
if infer_datetime_format is not lib.no_default:
939+
# Kept for compatibility with old code - TODO remove
940+
warnings.warn(
941+
"The argument 'infer_datetime_format' has been removed - a strict version "
942+
"of it is now the default, see "
943+
"https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html",
944+
stacklevel=find_stack_level(inspect.currentframe()),
945+
)
944946
# locals() should never be modified
945947
kwds = locals().copy()
946948
del kwds["filepath_or_buffer"]
@@ -993,7 +995,7 @@ def read_table(
993995
verbose: bool = ...,
994996
skip_blank_lines: bool = ...,
995997
parse_dates: bool | Sequence[Hashable] = ...,
996-
infer_datetime_format: bool = ...,
998+
infer_datetime_format: bool | lib.NoDefault = ...,
997999
keep_date_col: bool = ...,
9981000
date_parser=...,
9991001
dayfirst: bool = ...,
@@ -1054,7 +1056,7 @@ def read_table(
10541056
verbose: bool = ...,
10551057
skip_blank_lines: bool = ...,
10561058
parse_dates: bool | Sequence[Hashable] = ...,
1057-
infer_datetime_format: bool = ...,
1059+
infer_datetime_format: bool | lib.NoDefault = ...,
10581060
keep_date_col: bool = ...,
10591061
date_parser=...,
10601062
dayfirst: bool = ...,
@@ -1115,7 +1117,7 @@ def read_table(
11151117
verbose: bool = ...,
11161118
skip_blank_lines: bool = ...,
11171119
parse_dates: bool | Sequence[Hashable] = ...,
1118-
infer_datetime_format: bool = ...,
1120+
infer_datetime_format: bool | lib.NoDefault = ...,
11191121
keep_date_col: bool = ...,
11201122
date_parser=...,
11211123
dayfirst: bool = ...,
@@ -1176,7 +1178,7 @@ def read_table(
11761178
verbose: bool = ...,
11771179
skip_blank_lines: bool = ...,
11781180
parse_dates: bool | Sequence[Hashable] = ...,
1179-
infer_datetime_format: bool = ...,
1181+
infer_datetime_format: bool | lib.NoDefault = ...,
11801182
keep_date_col: bool = ...,
11811183
date_parser=...,
11821184
dayfirst: bool = ...,
@@ -1250,7 +1252,7 @@ def read_table(
12501252
skip_blank_lines: bool = True,
12511253
# Datetime Handling
12521254
parse_dates: bool | Sequence[Hashable] = False,
1253-
infer_datetime_format: bool = False,
1255+
infer_datetime_format: bool | lib.NoDefault = lib.no_default,
12541256
keep_date_col: bool = False,
12551257
date_parser=None,
12561258
dayfirst: bool = False,
@@ -1888,10 +1890,6 @@ def TextParser(*args, **kwds) -> TextFileReader:
18881890
Encoding to use for UTF when reading/writing (ex. 'utf-8')
18891891
squeeze : bool, default False
18901892
returns Series if only one column.
1891-
infer_datetime_format: bool, default False
1892-
If True and `parse_dates` is True for a column, try to infer the
1893-
datetime format based on the first datetime string. If the format
1894-
can be inferred, there often will be a large parsing speed-up.
18951893
float_precision : str, optional
18961894
Specifies which converter the C engine should use for floating-point
18971895
values. The options are `None` or `high` for the ordinary converter,

pandas/tests/io/parser/test_parse_dates.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -1260,7 +1260,6 @@ def test_bad_date_parse(all_parsers, cache_dates, value):
12601260
header=None,
12611261
names=["foo", "bar"],
12621262
parse_dates=["foo"],
1263-
infer_datetime_format=False,
12641263
cache_dates=cache_dates,
12651264
)
12661265

@@ -1288,7 +1287,6 @@ def test_bad_date_parse_with_warning(all_parsers, cache_dates, value):
12881287
header=None,
12891288
names=["foo", "bar"],
12901289
parse_dates=["foo"],
1291-
infer_datetime_format=False,
12921290
cache_dates=cache_dates,
12931291
)
12941292

@@ -1306,6 +1304,19 @@ def test_parse_dates_empty_string(all_parsers):
13061304
tm.assert_frame_equal(result, expected)
13071305

13081306

1307+
def test_parse_dates_infer_datetime_format_warning(all_parsers):
1308+
# GH 49024
1309+
parser = all_parsers
1310+
data = "Date,test\n2012-01-01,1\n,2"
1311+
parser.read_csv_check_warnings(
1312+
UserWarning,
1313+
"The argument 'infer_datetime_format' has been removed",
1314+
StringIO(data),
1315+
parse_dates=["Date"],
1316+
infer_datetime_format=True,
1317+
)
1318+
1319+
13091320
@xfail_pyarrow
13101321
@pytest.mark.parametrize(
13111322
"data,kwargs,expected",

0 commit comments

Comments
 (0)