Skip to content

Commit b8f22ad

Browse files
DEPR: Deprecate pandas/io/date_converters.py (#35741)
1 parent ab5b38d commit b8f22ad

File tree

5 files changed

+158
-66
lines changed

5 files changed

+158
-66
lines changed

doc/source/user_guide/io.rst

+1-14
Original file line numberDiff line numberDiff line change
@@ -930,7 +930,7 @@ take full advantage of the flexibility of the date parsing API:
930930
.. ipython:: python
931931
932932
df = pd.read_csv('tmp.csv', header=None, parse_dates=date_spec,
933-
date_parser=pd.io.date_converters.parse_date_time)
933+
date_parser=pd.to_datetime)
934934
df
935935
936936
Pandas will try to call the ``date_parser`` function in three different ways. If
@@ -942,11 +942,6 @@ an exception is raised, the next one is tried:
942942
2. If #1 fails, ``date_parser`` is called with all the columns
943943
concatenated row-wise into a single array (e.g., ``date_parser(['2013 1', '2013 2'])``).
944944

945-
3. If #2 fails, ``date_parser`` is called once for every row with one or more
946-
string arguments from the columns indicated with `parse_dates`
947-
(e.g., ``date_parser('2013', '1')`` for the first row, ``date_parser('2013', '2')``
948-
for the second, etc.).
949-
950945
Note that performance-wise, you should try these methods of parsing dates in order:
951946

952947
1. Try to infer the format using ``infer_datetime_format=True`` (see section below).
@@ -958,14 +953,6 @@ Note that performance-wise, you should try these methods of parsing dates in ord
958953
For optimal performance, this should be vectorized, i.e., it should accept arrays
959954
as arguments.
960955

961-
You can explore the date parsing functionality in
962-
`date_converters.py <https://github.com/pandas-dev/pandas/blob/master/pandas/io/date_converters.py>`__
963-
and add your own. We would love to turn this module into a community supported
964-
set of date/time parsers. To get you started, ``date_converters.py`` contains
965-
functions to parse dual date and time columns, year/month/day columns,
966-
and year/month/day/hour/minute/second columns. It also contains a
967-
``generic_parser`` function so you can curry it with a function that deals with
968-
a single date rather than the entire array.
969956

970957
.. ipython:: python
971958
:suppress:

doc/source/whatsnew/v1.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ Deprecations
195195
~~~~~~~~~~~~
196196
- Deprecated parameter ``inplace`` in :meth:`MultiIndex.set_codes` and :meth:`MultiIndex.set_levels` (:issue:`35626`)
197197
- Deprecated parameter ``dtype`` in :~meth:`Index.copy` on method all index classes. Use the :meth:`Index.astype` method instead for changing dtype(:issue:`35853`)
198-
-
198+
- Date parser functions :func:`~pandas.io.date_converters.parse_date_time`, :func:`~pandas.io.date_converters.parse_date_fields`, :func:`~pandas.io.date_converters.parse_all_fields` and :func:`~pandas.io.date_converters.generic_parser` from ``pandas.io.date_converters`` are deprecated and will be removed in a future version; use :func:`to_datetime` instead (:issue:`35741`)
199199

200200
.. ---------------------------------------------------------------------------
201201

pandas/io/date_converters.py

+62
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,71 @@
11
"""This module is designed for community supported date conversion functions"""
2+
import warnings
3+
24
import numpy as np
35

46
from pandas._libs.tslibs import parsing
57

68

79
def parse_date_time(date_col, time_col):
10+
"""
11+
Parse columns with dates and times into a single datetime column.
12+
13+
.. deprecated:: 1.2
14+
"""
15+
warnings.warn(
16+
"""
17+
Use pd.to_datetime(date_col + " " + time_col) instead to get a Pandas Series.
18+
Use pd.to_datetime(date_col + " " + time_col).to_pydatetime() instead to get a Numpy array.
19+
""", # noqa: E501
20+
FutureWarning,
21+
stacklevel=2,
22+
)
823
date_col = _maybe_cast(date_col)
924
time_col = _maybe_cast(time_col)
1025
return parsing.try_parse_date_and_time(date_col, time_col)
1126

1227

1328
def parse_date_fields(year_col, month_col, day_col):
29+
"""
30+
Parse columns with years, months and days into a single date column.
31+
32+
.. deprecated:: 1.2
33+
"""
34+
warnings.warn(
35+
"""
36+
Use pd.to_datetime({"year": year_col, "month": month_col, "day": day_col}) instead to get a Pandas Series.
37+
Use ser = pd.to_datetime({"year": year_col, "month": month_col, "day": day_col}) and
38+
np.array([s.to_pydatetime() for s in ser]) instead to get a Numpy array.
39+
""", # noqa: E501
40+
FutureWarning,
41+
stacklevel=2,
42+
)
43+
1444
year_col = _maybe_cast(year_col)
1545
month_col = _maybe_cast(month_col)
1646
day_col = _maybe_cast(day_col)
1747
return parsing.try_parse_year_month_day(year_col, month_col, day_col)
1848

1949

2050
def parse_all_fields(year_col, month_col, day_col, hour_col, minute_col, second_col):
51+
"""
52+
Parse columns with datetime information into a single datetime column.
53+
54+
.. deprecated:: 1.2
55+
"""
56+
57+
warnings.warn(
58+
"""
59+
Use pd.to_datetime({"year": year_col, "month": month_col, "day": day_col,
60+
"hour": hour_col, "minute": minute_col, second": second_col}) instead to get a Pandas Series.
61+
Use ser = pd.to_datetime({"year": year_col, "month": month_col, "day": day_col,
62+
"hour": hour_col, "minute": minute_col, second": second_col}) and
63+
np.array([s.to_pydatetime() for s in ser]) instead to get a Numpy array.
64+
""", # noqa: E501
65+
FutureWarning,
66+
stacklevel=2,
67+
)
68+
2169
year_col = _maybe_cast(year_col)
2270
month_col = _maybe_cast(month_col)
2371
day_col = _maybe_cast(day_col)
@@ -30,6 +78,20 @@ def parse_all_fields(year_col, month_col, day_col, hour_col, minute_col, second_
3078

3179

3280
def generic_parser(parse_func, *cols):
81+
"""
82+
Use dateparser to parse columns with data information into a single datetime column.
83+
84+
.. deprecated:: 1.2
85+
"""
86+
87+
warnings.warn(
88+
"""
89+
Use pd.to_datetime instead.
90+
""",
91+
FutureWarning,
92+
stacklevel=2,
93+
)
94+
3395
N = _check_columns(cols)
3496
results = np.empty(N, dtype=object)
3597

pandas/tests/io/parser/test_parse_dates.py

+85-45
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,11 @@ def test_date_col_as_index_col(all_parsers):
370370
tm.assert_frame_equal(result, expected)
371371

372372

373-
def test_multiple_date_cols_int_cast(all_parsers):
373+
@pytest.mark.parametrize(
374+
"date_parser, warning",
375+
([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]),
376+
)
377+
def test_multiple_date_cols_int_cast(all_parsers, date_parser, warning):
374378
data = (
375379
"KORD,19990127, 19:00:00, 18:56:00, 0.8100\n"
376380
"KORD,19990127, 20:00:00, 19:56:00, 0.0100\n"
@@ -382,13 +386,15 @@ def test_multiple_date_cols_int_cast(all_parsers):
382386
parse_dates = {"actual": [1, 2], "nominal": [1, 3]}
383387
parser = all_parsers
384388

385-
result = parser.read_csv(
386-
StringIO(data),
387-
header=None,
388-
date_parser=conv.parse_date_time,
389-
parse_dates=parse_dates,
390-
prefix="X",
391-
)
389+
with tm.assert_produces_warning(warning, check_stacklevel=False):
390+
result = parser.read_csv(
391+
StringIO(data),
392+
header=None,
393+
date_parser=date_parser,
394+
parse_dates=parse_dates,
395+
prefix="X",
396+
)
397+
392398
expected = DataFrame(
393399
[
394400
[datetime(1999, 1, 27, 19, 0), datetime(1999, 1, 27, 18, 56), "KORD", 0.81],
@@ -808,7 +814,9 @@ def test_parse_dates_custom_euro_format(all_parsers, kwargs):
808814
tm.assert_frame_equal(df, expected)
809815
else:
810816
msg = "got an unexpected keyword argument 'day_first'"
811-
with pytest.raises(TypeError, match=msg):
817+
with pytest.raises(TypeError, match=msg), tm.assert_produces_warning(
818+
FutureWarning
819+
):
812820
parser.read_csv(
813821
StringIO(data),
814822
names=["time", "Q", "NTU"],
@@ -1166,20 +1174,25 @@ def test_parse_dates_no_convert_thousands(all_parsers, data, kwargs, expected):
11661174
tm.assert_frame_equal(result, expected)
11671175

11681176

1169-
def test_parse_date_time_multi_level_column_name(all_parsers):
1177+
@pytest.mark.parametrize(
1178+
"date_parser, warning",
1179+
([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]),
1180+
)
1181+
def test_parse_date_time_multi_level_column_name(all_parsers, date_parser, warning):
11701182
data = """\
11711183
D,T,A,B
11721184
date, time,a,b
11731185
2001-01-05, 09:00:00, 0.0, 10.
11741186
2001-01-06, 00:00:00, 1.0, 11.
11751187
"""
11761188
parser = all_parsers
1177-
result = parser.read_csv(
1178-
StringIO(data),
1179-
header=[0, 1],
1180-
parse_dates={"date_time": [0, 1]},
1181-
date_parser=conv.parse_date_time,
1182-
)
1189+
with tm.assert_produces_warning(warning, check_stacklevel=False):
1190+
result = parser.read_csv(
1191+
StringIO(data),
1192+
header=[0, 1],
1193+
parse_dates={"date_time": [0, 1]},
1194+
date_parser=date_parser,
1195+
)
11831196

11841197
expected_data = [
11851198
[datetime(2001, 1, 5, 9, 0, 0), 0.0, 10.0],
@@ -1189,6 +1202,10 @@ def test_parse_date_time_multi_level_column_name(all_parsers):
11891202
tm.assert_frame_equal(result, expected)
11901203

11911204

1205+
@pytest.mark.parametrize(
1206+
"date_parser, warning",
1207+
([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]),
1208+
)
11921209
@pytest.mark.parametrize(
11931210
"data,kwargs,expected",
11941211
[
@@ -1261,9 +1278,10 @@ def test_parse_date_time_multi_level_column_name(all_parsers):
12611278
),
12621279
],
12631280
)
1264-
def test_parse_date_time(all_parsers, data, kwargs, expected):
1281+
def test_parse_date_time(all_parsers, data, kwargs, expected, date_parser, warning):
12651282
parser = all_parsers
1266-
result = parser.read_csv(StringIO(data), date_parser=conv.parse_date_time, **kwargs)
1283+
with tm.assert_produces_warning(warning, check_stacklevel=False):
1284+
result = parser.read_csv(StringIO(data), date_parser=date_parser, **kwargs)
12671285

12681286
# Python can sometimes be flaky about how
12691287
# the aggregated columns are entered, so
@@ -1272,15 +1290,20 @@ def test_parse_date_time(all_parsers, data, kwargs, expected):
12721290
tm.assert_frame_equal(result, expected)
12731291

12741292

1275-
def test_parse_date_fields(all_parsers):
1293+
@pytest.mark.parametrize(
1294+
"date_parser, warning",
1295+
([conv.parse_date_fields, FutureWarning], [pd.to_datetime, None]),
1296+
)
1297+
def test_parse_date_fields(all_parsers, date_parser, warning):
12761298
parser = all_parsers
12771299
data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11."
1278-
result = parser.read_csv(
1279-
StringIO(data),
1280-
header=0,
1281-
parse_dates={"ymd": [0, 1, 2]},
1282-
date_parser=conv.parse_date_fields,
1283-
)
1300+
with tm.assert_produces_warning(warning, check_stacklevel=False):
1301+
result = parser.read_csv(
1302+
StringIO(data),
1303+
header=0,
1304+
parse_dates={"ymd": [0, 1, 2]},
1305+
date_parser=date_parser,
1306+
)
12841307

12851308
expected = DataFrame(
12861309
[[datetime(2001, 1, 10), 10.0], [datetime(2001, 2, 1), 11.0]],
@@ -1289,19 +1312,27 @@ def test_parse_date_fields(all_parsers):
12891312
tm.assert_frame_equal(result, expected)
12901313

12911314

1292-
def test_parse_date_all_fields(all_parsers):
1315+
@pytest.mark.parametrize(
1316+
"date_parser, warning",
1317+
(
1318+
[conv.parse_all_fields, FutureWarning],
1319+
[lambda x: pd.to_datetime(x, format="%Y %m %d %H %M %S"), None],
1320+
),
1321+
)
1322+
def test_parse_date_all_fields(all_parsers, date_parser, warning):
12931323
parser = all_parsers
12941324
data = """\
12951325
year,month,day,hour,minute,second,a,b
12961326
2001,01,05,10,00,0,0.0,10.
12971327
2001,01,5,10,0,00,1.,11.
12981328
"""
1299-
result = parser.read_csv(
1300-
StringIO(data),
1301-
header=0,
1302-
date_parser=conv.parse_all_fields,
1303-
parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]},
1304-
)
1329+
with tm.assert_produces_warning(warning, check_stacklevel=False):
1330+
result = parser.read_csv(
1331+
StringIO(data),
1332+
header=0,
1333+
date_parser=date_parser,
1334+
parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]},
1335+
)
13051336
expected = DataFrame(
13061337
[
13071338
[datetime(2001, 1, 5, 10, 0, 0), 0.0, 10.0],
@@ -1312,19 +1343,27 @@ def test_parse_date_all_fields(all_parsers):
13121343
tm.assert_frame_equal(result, expected)
13131344

13141345

1315-
def test_datetime_fractional_seconds(all_parsers):
1346+
@pytest.mark.parametrize(
1347+
"date_parser, warning",
1348+
(
1349+
[conv.parse_all_fields, FutureWarning],
1350+
[lambda x: pd.to_datetime(x, format="%Y %m %d %H %M %S.%f"), None],
1351+
),
1352+
)
1353+
def test_datetime_fractional_seconds(all_parsers, date_parser, warning):
13161354
parser = all_parsers
13171355
data = """\
13181356
year,month,day,hour,minute,second,a,b
13191357
2001,01,05,10,00,0.123456,0.0,10.
13201358
2001,01,5,10,0,0.500000,1.,11.
13211359
"""
1322-
result = parser.read_csv(
1323-
StringIO(data),
1324-
header=0,
1325-
date_parser=conv.parse_all_fields,
1326-
parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]},
1327-
)
1360+
with tm.assert_produces_warning(warning, check_stacklevel=False):
1361+
result = parser.read_csv(
1362+
StringIO(data),
1363+
header=0,
1364+
date_parser=date_parser,
1365+
parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]},
1366+
)
13281367
expected = DataFrame(
13291368
[
13301369
[datetime(2001, 1, 5, 10, 0, 0, microsecond=123456), 0.0, 10.0],
@@ -1339,12 +1378,13 @@ def test_generic(all_parsers):
13391378
parser = all_parsers
13401379
data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11."
13411380

1342-
result = parser.read_csv(
1343-
StringIO(data),
1344-
header=0,
1345-
parse_dates={"ym": [0, 1]},
1346-
date_parser=lambda y, m: date(year=int(y), month=int(m), day=1),
1347-
)
1381+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
1382+
result = parser.read_csv(
1383+
StringIO(data),
1384+
header=0,
1385+
parse_dates={"ym": [0, 1]},
1386+
date_parser=lambda y, m: date(year=int(y), month=int(m), day=1),
1387+
)
13481388
expected = DataFrame(
13491389
[[date(2001, 1, 1), 10, 10.0], [date(2001, 2, 1), 1, 11.0]],
13501390
columns=["ym", "day", "a"],

pandas/tests/io/test_date_converters.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,23 @@
88

99

1010
def test_parse_date_time():
11+
1112
dates = np.array(["2007/1/3", "2008/2/4"], dtype=object)
1213
times = np.array(["05:07:09", "06:08:00"], dtype=object)
1314
expected = np.array([datetime(2007, 1, 3, 5, 7, 9), datetime(2008, 2, 4, 6, 8, 0)])
14-
15-
result = conv.parse_date_time(dates, times)
15+
with tm.assert_produces_warning(FutureWarning):
16+
result = conv.parse_date_time(dates, times)
1617
tm.assert_numpy_array_equal(result, expected)
1718

1819

1920
def test_parse_date_fields():
2021
days = np.array([3, 4])
2122
months = np.array([1, 2])
2223
years = np.array([2007, 2008])
23-
result = conv.parse_date_fields(years, months, days)
24-
2524
expected = np.array([datetime(2007, 1, 3), datetime(2008, 2, 4)])
25+
26+
with tm.assert_produces_warning(FutureWarning):
27+
result = conv.parse_date_fields(years, months, days)
2628
tm.assert_numpy_array_equal(result, expected)
2729

2830

@@ -34,7 +36,8 @@ def test_parse_all_fields():
3436
days = np.array([3, 4])
3537
years = np.array([2007, 2008])
3638
months = np.array([1, 2])
37-
38-
result = conv.parse_all_fields(years, months, days, hours, minutes, seconds)
3939
expected = np.array([datetime(2007, 1, 3, 5, 7, 9), datetime(2008, 2, 4, 6, 8, 0)])
40+
41+
with tm.assert_produces_warning(FutureWarning):
42+
result = conv.parse_all_fields(years, months, days, hours, minutes, seconds)
4043
tm.assert_numpy_array_equal(result, expected)

0 commit comments

Comments
 (0)