Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit f2d9eb9

Browse files
author
MarcoGorelli
committedJan 27, 2023
wip
1 parent 01693d6 commit f2d9eb9

File tree

5 files changed

+148
-25
lines changed

5 files changed

+148
-25
lines changed
 

‎doc/source/user_guide/io.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,16 @@ date_parser : function, default ``None``
290290
values from the columns defined by parse_dates into a single array and pass
291291
that; and 3) call date_parser once for each row using one or more strings
292292
(corresponding to the columns defined by parse_dates) as arguments.
293+
294+
.. deprecated:: 2.0.0
295+
Use ``date_format`` instead, or read in as ``object`` and then apply
296+
:func:`to_datetime` as-needed.
297+
date_format : str, default ``None``
298+
If used in conjunction with ``parse_dates``, will parse dates according to this
299+
format. For anything more complex (e.g. different formats for different columns),
300+
please read in as ``object`` and then apply :func:`to_datetime` as-needed.
301+
302+
.. versionadded:: 2.0.0
293303
dayfirst : boolean, default ``False``
294304
DD/MM format dates, international and European format.
295305
cache_dates : boolean, default True

‎pandas/io/excel/_base.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,16 @@
250250
and pass that; and 3) call `date_parser` once for each row using one or
251251
more strings (corresponding to the columns defined by `parse_dates`) as
252252
arguments.
253+
254+
.. deprecated:: 2.0.0
255+
Use ``date_format`` instead, or read in as ``object`` and then apply
256+
:func:`to_datetime` as-needed.
257+
date_format : str, default ``None``
258+
If used in conjunction with ``parse_dates``, will parse dates according to this
259+
format. For anything more complex (e.g. different formats for different columns),
260+
please read in as ``object`` and then apply :func:`to_datetime` as-needed.
261+
262+
.. versionadded:: 2.0.0
253263
thousands : str, default None
254264
Thousands separator for parsing string columns to numeric. Note that
255265
this parameter is only necessary for columns stored as TEXT in Excel,

‎pandas/io/parsers/base_parser.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ def __init__(self, kwds) -> None:
116116
self.parse_dates = _validate_parse_dates_arg(kwds.pop("parse_dates", False))
117117
self._parse_date_cols: Iterable = []
118118
self.date_parser = kwds.pop("date_parser", None)
119+
self.date_format = kwds.pop("date_format", None)
119120
self.dayfirst = kwds.pop("dayfirst", False)
120121
self.keep_date_col = kwds.pop("keep_date_col", False)
121122

@@ -134,6 +135,7 @@ def __init__(self, kwds) -> None:
134135

135136
self._date_conv = _make_date_converter(
136137
date_parser=self.date_parser,
138+
date_format=self.date_format,
137139
dayfirst=self.dayfirst,
138140
cache_dates=self.cache_dates,
139141
)
@@ -1092,13 +1094,15 @@ def _make_date_converter(
10921094
date_parser=None,
10931095
dayfirst: bool = False,
10941096
cache_dates: bool = True,
1097+
date_format=None,
10951098
):
10961099
def converter(*date_cols):
10971100
if date_parser is None:
10981101
strs = parsing.concat_date_cols(date_cols)
10991102

11001103
return tools.to_datetime(
11011104
ensure_object(strs),
1105+
format=date_format,
11021106
utc=False,
11031107
dayfirst=dayfirst,
11041108
errors="ignore",
@@ -1153,6 +1157,7 @@ def converter(*date_cols):
11531157
"keep_date_col": False,
11541158
"dayfirst": False,
11551159
"date_parser": None,
1160+
"date_format": None,
11561161
"usecols": None,
11571162
# 'iterator': False,
11581163
"chunksize": None,

‎pandas/io/parsers/readers.py

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -239,10 +239,7 @@
239239
say because of an unparsable value or a mixture of timezones, the column
240240
or index will be returned unaltered as an object data type. For
241241
non-standard datetime parsing, use ``pd.to_datetime`` after
242-
``pd.read_csv``. To parse an index or column with a mixture of timezones,
243-
specify ``date_parser`` to be a partially-applied
244-
:func:`pandas.to_datetime` with ``utc=True``. See
245-
:ref:`io.csv.mixed_timezones` for more.
242+
``pd.read_csv``.
246243
247244
Note: A fast-path exists for iso8601-formatted dates.
248245
infer_datetime_format : bool, default False
@@ -267,6 +264,16 @@
267264
and pass that; and 3) call `date_parser` once for each row using one or
268265
more strings (corresponding to the columns defined by `parse_dates`) as
269266
arguments.
267+
268+
.. deprecated:: 2.0.0
269+
Use ``date_format`` instead, or read in as ``object`` and then apply
270+
:func:`to_datetime` as-needed.
271+
date_format : str, default ``None``
272+
If used in conjunction with ``parse_dates``, will parse dates according to this
273+
format. For anything more complex (e.g. different formats for different columns),
274+
please read in as ``object`` and then apply :func:`to_datetime` as-needed.
275+
276+
.. versionadded:: 2.0.0
270277
dayfirst : bool, default False
271278
DD/MM format dates, international and European format.
272279
cache_dates : bool, default True
@@ -546,7 +553,7 @@ def _read(
546553
# if we pass a date_parser and parse_dates=False, we should not parse the
547554
# dates GH#44366
548555
if kwds.get("parse_dates", None) is None:
549-
if kwds.get("date_parser", None) is None:
556+
if kwds.get("date_parser", None) is None and kwds.get("date_format") is None:
550557
kwds["parse_dates"] = False
551558
else:
552559
kwds["parse_dates"] = True
@@ -620,6 +627,7 @@ def read_csv(
620627
infer_datetime_format: bool | lib.NoDefault = ...,
621628
keep_date_col: bool = ...,
622629
date_parser=...,
630+
date_format: str | None = ...,
623631
dayfirst: bool = ...,
624632
cache_dates: bool = ...,
625633
iterator: Literal[True],
@@ -676,6 +684,7 @@ def read_csv(
676684
infer_datetime_format: bool | lib.NoDefault = ...,
677685
keep_date_col: bool = ...,
678686
date_parser=...,
687+
date_format: str | None = ...,
679688
dayfirst: bool = ...,
680689
cache_dates: bool = ...,
681690
iterator: bool = ...,
@@ -732,6 +741,7 @@ def read_csv(
732741
infer_datetime_format: bool | lib.NoDefault = ...,
733742
keep_date_col: bool = ...,
734743
date_parser=...,
744+
date_format: str | None = ...,
735745
dayfirst: bool = ...,
736746
cache_dates: bool = ...,
737747
iterator: Literal[False] = ...,
@@ -788,6 +798,7 @@ def read_csv(
788798
infer_datetime_format: bool | lib.NoDefault = ...,
789799
keep_date_col: bool = ...,
790800
date_parser=...,
801+
date_format: str | None = ...,
791802
dayfirst: bool = ...,
792803
cache_dates: bool = ...,
793804
iterator: bool = ...,
@@ -856,6 +867,7 @@ def read_csv(
856867
infer_datetime_format: bool | lib.NoDefault = lib.no_default,
857868
keep_date_col: bool = False,
858869
date_parser=None,
870+
date_format: str | None = None,
859871
dayfirst: bool = False,
860872
cache_dates: bool = True,
861873
# Iteration
@@ -943,6 +955,7 @@ def read_table(
943955
infer_datetime_format: bool | lib.NoDefault = ...,
944956
keep_date_col: bool = ...,
945957
date_parser=...,
958+
date_format: str | None = ...,
946959
dayfirst: bool = ...,
947960
cache_dates: bool = ...,
948961
iterator: Literal[True],
@@ -999,6 +1012,7 @@ def read_table(
9991012
infer_datetime_format: bool | lib.NoDefault = ...,
10001013
keep_date_col: bool = ...,
10011014
date_parser=...,
1015+
date_format: str | None = ...,
10021016
dayfirst: bool = ...,
10031017
cache_dates: bool = ...,
10041018
iterator: bool = ...,
@@ -1055,6 +1069,7 @@ def read_table(
10551069
infer_datetime_format: bool | lib.NoDefault = ...,
10561070
keep_date_col: bool = ...,
10571071
date_parser=...,
1072+
date_format: str | None = ...,
10581073
dayfirst: bool = ...,
10591074
cache_dates: bool = ...,
10601075
iterator: Literal[False] = ...,
@@ -1111,6 +1126,7 @@ def read_table(
11111126
infer_datetime_format: bool | lib.NoDefault = ...,
11121127
keep_date_col: bool = ...,
11131128
date_parser=...,
1129+
date_format: str | None = ...,
11141130
dayfirst: bool = ...,
11151131
cache_dates: bool = ...,
11161132
iterator: bool = ...,
@@ -1179,6 +1195,7 @@ def read_table(
11791195
infer_datetime_format: bool | lib.NoDefault = lib.no_default,
11801196
keep_date_col: bool = False,
11811197
date_parser=None,
1198+
date_format: str | None = None,
11821199
dayfirst: bool = False,
11831200
cache_dates: bool = True,
11841201
# Iteration
@@ -1207,6 +1224,17 @@ def read_table(
12071224
storage_options: StorageOptions = None,
12081225
use_nullable_dtypes: bool | lib.NoDefault = lib.no_default,
12091226
) -> DataFrame | TextFileReader:
1227+
if date_parser is not None:
1228+
warnings.warn(
1229+
"The argument 'date_parser' is deprecated and will "
1230+
"be removed in a future version. "
1231+
"Please use 'date_format' instead, or read your data in as 'object' dtype "
1232+
"and then call 'to_datetime'.",
1233+
FutureWarning,
1234+
stacklevel=find_stack_level(),
1235+
)
1236+
if date_parser is not None and date_format is not None:
1237+
raise TypeError("Cannot use both 'date_parser' and 'date_format'")
12101238
# locals() should never be modified
12111239
kwds = locals().copy()
12121240
del kwds["filepath_or_buffer"]
@@ -1762,6 +1790,11 @@ def TextParser(*args, **kwds) -> TextFileReader:
17621790
parse_dates : bool, default False
17631791
keep_date_col : bool, default False
17641792
date_parser : function, optional
1793+
1794+
.. deprecated:: 2.0.0
1795+
date_format : str, default ``None``
1796+
1797+
.. versionadded:: 2.0.0
17651798
skiprows : list of integers
17661799
Row numbers to skip
17671800
skipfooter : int

‎pandas/tests/io/parser/test_parse_dates.py

Lines changed: 85 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,9 @@ def __custom_date_parser(time):
6363
41051.00 -98573.7302 871458.0640 389.0086
6464
"""
6565
)
66-
result = all_parsers.read_csv(
66+
result = all_parsers.read_csv_check_warnings(
67+
FutureWarning,
68+
"Please use 'date_format' instead",
6769
testdata,
6870
delim_whitespace=True,
6971
parse_dates=True,
@@ -101,7 +103,9 @@ def __custom_date_parser(time):
101103
41051.00 -97.72
102104
"""
103105
)
104-
result = all_parsers.read_csv(
106+
result = all_parsers.read_csv_check_warnings(
107+
FutureWarning,
108+
"Please use 'date_format' instead",
105109
testdata,
106110
delim_whitespace=True,
107111
parse_dates=False,
@@ -176,7 +180,12 @@ def date_parser(*date_cols):
176180
"keep_date_col": keep_date_col,
177181
"names": ["X0", "X1", "X2", "X3", "X4", "X5", "X6", "X7", "X8"],
178182
}
179-
result = parser.read_csv(StringIO(data), **kwds)
183+
result = parser.read_csv_check_warnings(
184+
FutureWarning,
185+
"use 'date_format' instead",
186+
StringIO(data),
187+
**kwds,
188+
)
180189

181190
expected = DataFrame(
182191
[
@@ -482,7 +491,9 @@ def test_multiple_date_cols_int_cast(all_parsers):
482491
"parse_dates": parse_dates,
483492
"date_parser": pd.to_datetime,
484493
}
485-
result = parser.read_csv(StringIO(data), **kwds)
494+
result = parser.read_csv_check_warnings(
495+
FutureWarning, "use 'date_format' instead", StringIO(data), **kwds
496+
)
486497

487498
expected = DataFrame(
488499
[
@@ -529,8 +540,13 @@ def test_multiple_date_col_timestamp_parse(all_parsers):
529540
data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25
530541
05/31/2012,15:30:00.029,1306.25,8,E,0,,1306.25"""
531542

532-
result = parser.read_csv(
533-
StringIO(data), parse_dates=[[0, 1]], header=None, date_parser=Timestamp
543+
result = parser.read_csv_check_warnings(
544+
FutureWarning,
545+
"use 'date_format' instead",
546+
StringIO(data),
547+
parse_dates=[[0, 1]],
548+
header=None,
549+
date_parser=Timestamp,
534550
)
535551
expected = DataFrame(
536552
[
@@ -686,7 +702,9 @@ def test_date_parser_int_bug(all_parsers):
686702
"12345,1,-1,3,invoice_InvoiceResource,search\n"
687703
)
688704

689-
result = parser.read_csv(
705+
result = parser.read_csv_check_warnings(
706+
FutureWarning,
707+
"use 'date_format' instead",
690708
StringIO(data),
691709
index_col=0,
692710
parse_dates=[0],
@@ -752,8 +770,11 @@ def test_csv_custom_parser(all_parsers):
752770
20090103,c,4,5
753771
"""
754772
parser = all_parsers
755-
result = parser.read_csv(
756-
StringIO(data), date_parser=lambda x: datetime.strptime(x, "%Y%m%d")
773+
result = parser.read_csv_check_warnings(
774+
FutureWarning,
775+
"use 'date_format' instead",
776+
StringIO(data),
777+
date_parser=lambda x: datetime.strptime(x, "%Y%m%d"),
757778
)
758779
expected = parser.read_csv(StringIO(data), parse_dates=True)
759780
tm.assert_frame_equal(result, expected)
@@ -903,7 +924,9 @@ def test_parse_dates_custom_euro_format(all_parsers, kwargs):
903924
02/02/2010,1,2
904925
"""
905926
if "dayfirst" in kwargs:
906-
df = parser.read_csv(
927+
df = parser.read_csv_check_warnings(
928+
FutureWarning,
929+
"use 'date_format' instead",
907930
StringIO(data),
908931
names=["time", "Q", "NTU"],
909932
date_parser=lambda d: du_parse(d, **kwargs),
@@ -925,7 +948,9 @@ def test_parse_dates_custom_euro_format(all_parsers, kwargs):
925948
else:
926949
msg = "got an unexpected keyword argument 'day_first'"
927950
with pytest.raises(TypeError, match=msg):
928-
parser.read_csv(
951+
parser.read_csv_check_warnings(
952+
FutureWarning,
953+
"use 'date_format' instead",
929954
StringIO(data),
930955
names=["time", "Q", "NTU"],
931956
date_parser=lambda d: du_parse(d, **kwargs),
@@ -1295,14 +1320,34 @@ def test_parse_dates_infer_datetime_format_warning(all_parsers):
12951320
parser = all_parsers
12961321
data = "Date,test\n2012-01-01,1\n,2"
12971322
parser.read_csv_check_warnings(
1298-
UserWarning,
1323+
FutureWarning,
12991324
"The argument 'infer_datetime_format' is deprecated",
13001325
StringIO(data),
13011326
parse_dates=["Date"],
13021327
infer_datetime_format=True,
13031328
)
13041329

13051330

1331+
@pytest.mark.parametrize(
1332+
"reader", ["read_csv_check_warnings", "read_table_check_warnings"]
1333+
)
1334+
def test_parse_dates_date_parser_and_date_format(all_parsers, reader):
1335+
# GH ???
1336+
parser = all_parsers
1337+
data = "Date,test\n2012-01-01,1\n,2"
1338+
msg = "Cannot use both 'date_parser' and 'date_format'"
1339+
with pytest.raises(TypeError, match=msg):
1340+
getattr(parser, reader)(
1341+
FutureWarning,
1342+
"use 'date_format' instead",
1343+
StringIO(data),
1344+
parse_dates=["Date"],
1345+
date_parser=pd.to_datetime,
1346+
date_format="ISO8601",
1347+
sep=",",
1348+
)
1349+
1350+
13061351
@xfail_pyarrow
13071352
@pytest.mark.parametrize(
13081353
"data,kwargs,expected",
@@ -1353,7 +1398,9 @@ def test_parse_date_time_multi_level_column_name(all_parsers):
13531398
2001-01-06, 00:00:00, 1.0, 11.
13541399
"""
13551400
parser = all_parsers
1356-
result = parser.read_csv(
1401+
result = parser.read_csv_check_warnings(
1402+
FutureWarning,
1403+
"use 'date_format' instead",
13571404
StringIO(data),
13581405
header=[0, 1],
13591406
parse_dates={"date_time": [0, 1]},
@@ -1443,7 +1490,13 @@ def test_parse_date_time_multi_level_column_name(all_parsers):
14431490
)
14441491
def test_parse_date_time(all_parsers, data, kwargs, expected):
14451492
parser = all_parsers
1446-
result = parser.read_csv(StringIO(data), date_parser=pd.to_datetime, **kwargs)
1493+
result = parser.read_csv_check_warnings(
1494+
FutureWarning,
1495+
"use 'date_format' instead",
1496+
StringIO(data),
1497+
date_parser=pd.to_datetime,
1498+
**kwargs,
1499+
)
14471500

14481501
# Python can sometimes be flaky about how
14491502
# the aggregated columns are entered, so
@@ -1458,7 +1511,9 @@ def test_parse_date_time(all_parsers, data, kwargs, expected):
14581511
def test_parse_date_fields(all_parsers):
14591512
parser = all_parsers
14601513
data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11."
1461-
result = parser.read_csv(
1514+
result = parser.read_csv_check_warnings(
1515+
FutureWarning,
1516+
"use 'date_format' instead",
14621517
StringIO(data),
14631518
header=0,
14641519
parse_dates={"ymd": [0, 1, 2]},
@@ -1480,7 +1535,9 @@ def test_parse_date_all_fields(all_parsers):
14801535
2001,01,05,10,00,0,0.0,10.
14811536
2001,01,5,10,0,00,1.,11.
14821537
"""
1483-
result = parser.read_csv(
1538+
result = parser.read_csv_check_warnings(
1539+
FutureWarning,
1540+
"use 'date_format' instead",
14841541
StringIO(data),
14851542
header=0,
14861543
date_parser=lambda x: pd.to_datetime(x, format="%Y %m %d %H %M %S"),
@@ -1504,7 +1561,9 @@ def test_datetime_fractional_seconds(all_parsers):
15041561
2001,01,05,10,00,0.123456,0.0,10.
15051562
2001,01,5,10,0,0.500000,1.,11.
15061563
"""
1507-
result = parser.read_csv(
1564+
result = parser.read_csv_check_warnings(
1565+
FutureWarning,
1566+
"use 'date_format' instead",
15081567
StringIO(data),
15091568
header=0,
15101569
date_parser=lambda x: pd.to_datetime(x, format="%Y %m %d %H %M %S.%f"),
@@ -1528,7 +1587,9 @@ def test_generic(all_parsers):
15281587
def parse_function(yy, mm):
15291588
return [date(year=int(y), month=int(m), day=1) for y, m in zip(yy, mm)]
15301589

1531-
result = parser.read_csv(
1590+
result = parser.read_csv_check_warnings(
1591+
FutureWarning,
1592+
"use 'date_format' instead",
15321593
StringIO(data),
15331594
header=0,
15341595
parse_dates={"ym": [0, 1]},
@@ -1561,7 +1622,9 @@ def date_parser(dt, time):
15611622
arr = [datetime.combine(d, t) for d, t in zip(dt, time)]
15621623
return np.array(arr, dtype="datetime64[s]")
15631624

1564-
result = parser.read_csv(
1625+
result = parser.read_csv_check_warnings(
1626+
FutureWarning,
1627+
"use 'date_format' instead",
15651628
StringIO(data),
15661629
date_parser=date_parser,
15671630
parse_dates={"datetime": ["date", "time"]},
@@ -1997,7 +2060,9 @@ def test_replace_nans_before_parsing_dates(all_parsers):
19972060
#
19982061
2017-09-09
19992062
"""
2000-
result = parser.read_csv(
2063+
result = parser.read_csv_check_warnings(
2064+
FutureWarning,
2065+
"use 'date_format' instead",
20012066
StringIO(data),
20022067
na_values={"Test": ["#", "0"]},
20032068
parse_dates=["Test"],

0 commit comments

Comments
 (0)
Please sign in to comment.