DEPR: Deprecate pandas/io/date_converters.py (#35741)

avinashpancham · web-flow · commit b8f22ad3b980 · 2020-09-12T17:37:57.000-04:00
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -930,7 +930,7 @@ take full advantage of the flexibility of the date parsing API:
 .. ipython:: python
 
    df = pd.read_csv('tmp.csv', header=None, parse_dates=date_spec,
-                    date_parser=pd.io.date_converters.parse_date_time)
+                    date_parser=pd.to_datetime)
    df
 
 Pandas will try to call the ``date_parser`` function in three different ways. If
@@ -942,11 +942,6 @@ an exception is raised, the next one is tried:
 2. If #1 fails, ``date_parser`` is called with all the columns
    concatenated row-wise into a single array (e.g., ``date_parser(['2013 1', '2013 2'])``).
 
-3. If #2 fails, ``date_parser`` is called once for every row with one or more
-   string arguments from the columns indicated with `parse_dates`
-   (e.g., ``date_parser('2013', '1')`` for the first row, ``date_parser('2013', '2')``
-   for the second, etc.).
-
 Note that performance-wise, you should try these methods of parsing dates in order:
 
 1. Try to infer the format using ``infer_datetime_format=True`` (see section below).
@@ -958,14 +953,6 @@ Note that performance-wise, you should try these methods of parsing dates in ord
    For optimal performance, this should be vectorized, i.e., it should accept arrays
    as arguments.
 
-You can explore the date parsing functionality in
-`date_converters.py <https://github.com/pandas-dev/pandas/blob/master/pandas/io/date_converters.py>`__
-and add your own. We would love to turn this module into a community supported
-set of date/time parsers. To get you started, ``date_converters.py`` contains
-functions to parse dual date and time columns, year/month/day columns,
-and year/month/day/hour/minute/second columns. It also contains a
-``generic_parser`` function so you can curry it with a function that deals with
-a single date rather than the entire array.
 
 .. ipython:: python
    :suppress:
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -195,7 +195,7 @@ Deprecations
 ~~~~~~~~~~~~
 - Deprecated parameter ``inplace`` in :meth:`MultiIndex.set_codes` and :meth:`MultiIndex.set_levels` (:issue:`35626`)
 - Deprecated parameter ``dtype`` in :~meth:`Index.copy` on method all index classes. Use the :meth:`Index.astype` method instead for changing dtype(:issue:`35853`)
--
+- Date parser functions :func:`~pandas.io.date_converters.parse_date_time`, :func:`~pandas.io.date_converters.parse_date_fields`, :func:`~pandas.io.date_converters.parse_all_fields` and :func:`~pandas.io.date_converters.generic_parser` from ``pandas.io.date_converters`` are deprecated and will be removed in a future version; use :func:`to_datetime` instead (:issue:`35741`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/io/date_converters.py b/pandas/io/date_converters.py
@@ -1,23 +1,71 @@
 """This module is designed for community supported date conversion functions"""
+import warnings
+
 import numpy as np
 
 from pandas._libs.tslibs import parsing
 
 
 def parse_date_time(date_col, time_col):
+    """
+    Parse columns with dates and times into a single datetime column.
+
+    .. deprecated:: 1.2
+    """
+    warnings.warn(
+        """
+        Use pd.to_datetime(date_col + " " + time_col) instead to get a Pandas Series.
+        Use pd.to_datetime(date_col + " " + time_col).to_pydatetime() instead to get a Numpy array.
+""",  # noqa: E501
+        FutureWarning,
+        stacklevel=2,
+    )
     date_col = _maybe_cast(date_col)
     time_col = _maybe_cast(time_col)
     return parsing.try_parse_date_and_time(date_col, time_col)
 
 
 def parse_date_fields(year_col, month_col, day_col):
+    """
+    Parse columns with years, months and days into a single date column.
+
+    .. deprecated:: 1.2
+    """
+    warnings.warn(
+        """
+        Use pd.to_datetime({"year": year_col, "month": month_col, "day": day_col}) instead to get a Pandas Series.
+        Use ser = pd.to_datetime({"year": year_col, "month": month_col, "day": day_col}) and
+        np.array([s.to_pydatetime() for s in ser]) instead to get a Numpy array.
+""",  # noqa: E501
+        FutureWarning,
+        stacklevel=2,
+    )
+
     year_col = _maybe_cast(year_col)
     month_col = _maybe_cast(month_col)
     day_col = _maybe_cast(day_col)
     return parsing.try_parse_year_month_day(year_col, month_col, day_col)
 
 
 def parse_all_fields(year_col, month_col, day_col, hour_col, minute_col, second_col):
+    """
+    Parse columns with datetime information into a single datetime column.
+
+    .. deprecated:: 1.2
+    """
+
+    warnings.warn(
+        """
+        Use pd.to_datetime({"year": year_col, "month": month_col, "day": day_col,
+        "hour": hour_col, "minute": minute_col, second": second_col}) instead to get a Pandas Series.
+        Use ser = pd.to_datetime({"year": year_col, "month": month_col, "day": day_col,
+        "hour": hour_col, "minute": minute_col, second": second_col}) and
+        np.array([s.to_pydatetime() for s in ser]) instead to get a Numpy array.
+""",  # noqa: E501
+        FutureWarning,
+        stacklevel=2,
+    )
+
     year_col = _maybe_cast(year_col)
     month_col = _maybe_cast(month_col)
     day_col = _maybe_cast(day_col)
@@ -30,6 +78,20 @@ def parse_all_fields(year_col, month_col, day_col, hour_col, minute_col, second_
 
 
 def generic_parser(parse_func, *cols):
+    """
+    Use dateparser to parse columns with data information into a single datetime column.
+
+    .. deprecated:: 1.2
+    """
+
+    warnings.warn(
+        """
+        Use pd.to_datetime instead.
+""",
+        FutureWarning,
+        stacklevel=2,
+    )
+
     N = _check_columns(cols)
     results = np.empty(N, dtype=object)
 
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
@@ -370,7 +370,11 @@ def test_date_col_as_index_col(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-def test_multiple_date_cols_int_cast(all_parsers):
+@pytest.mark.parametrize(
+    "date_parser, warning",
+    ([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]),
+)
+def test_multiple_date_cols_int_cast(all_parsers, date_parser, warning):
     data = (
         "KORD,19990127, 19:00:00, 18:56:00, 0.8100\n"
         "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n"
@@ -382,13 +386,15 @@ def test_multiple_date_cols_int_cast(all_parsers):
     parse_dates = {"actual": [1, 2], "nominal": [1, 3]}
     parser = all_parsers
 
-    result = parser.read_csv(
-        StringIO(data),
-        header=None,
-        date_parser=conv.parse_date_time,
-        parse_dates=parse_dates,
-        prefix="X",
-    )
+    with tm.assert_produces_warning(warning, check_stacklevel=False):
+        result = parser.read_csv(
+            StringIO(data),
+            header=None,
+            date_parser=date_parser,
+            parse_dates=parse_dates,
+            prefix="X",
+        )
+
     expected = DataFrame(
         [
             [datetime(1999, 1, 27, 19, 0), datetime(1999, 1, 27, 18, 56), "KORD", 0.81],
@@ -808,7 +814,9 @@ def test_parse_dates_custom_euro_format(all_parsers, kwargs):
         tm.assert_frame_equal(df, expected)
     else:
         msg = "got an unexpected keyword argument 'day_first'"
-        with pytest.raises(TypeError, match=msg):
+        with pytest.raises(TypeError, match=msg), tm.assert_produces_warning(
+            FutureWarning
+        ):
             parser.read_csv(
                 StringIO(data),
                 names=["time", "Q", "NTU"],
@@ -1166,20 +1174,25 @@ def test_parse_dates_no_convert_thousands(all_parsers, data, kwargs, expected):
     tm.assert_frame_equal(result, expected)
 
 
-def test_parse_date_time_multi_level_column_name(all_parsers):
+@pytest.mark.parametrize(
+    "date_parser, warning",
+    ([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]),
+)
+def test_parse_date_time_multi_level_column_name(all_parsers, date_parser, warning):
     data = """\
 D,T,A,B
 date, time,a,b
 2001-01-05, 09:00:00, 0.0, 10.
 2001-01-06, 00:00:00, 1.0, 11.
 """
     parser = all_parsers
-    result = parser.read_csv(
-        StringIO(data),
-        header=[0, 1],
-        parse_dates={"date_time": [0, 1]},
-        date_parser=conv.parse_date_time,
-    )
+    with tm.assert_produces_warning(warning, check_stacklevel=False):
+        result = parser.read_csv(
+            StringIO(data),
+            header=[0, 1],
+            parse_dates={"date_time": [0, 1]},
+            date_parser=date_parser,
+        )
 
     expected_data = [
         [datetime(2001, 1, 5, 9, 0, 0), 0.0, 10.0],
@@ -1189,6 +1202,10 @@ def test_parse_date_time_multi_level_column_name(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize(
+    "date_parser, warning",
+    ([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]),
+)
 @pytest.mark.parametrize(
     "data,kwargs,expected",
     [
@@ -1261,9 +1278,10 @@ def test_parse_date_time_multi_level_column_name(all_parsers):
         ),
     ],
 )
-def test_parse_date_time(all_parsers, data, kwargs, expected):
+def test_parse_date_time(all_parsers, data, kwargs, expected, date_parser, warning):
     parser = all_parsers
-    result = parser.read_csv(StringIO(data), date_parser=conv.parse_date_time, **kwargs)
+    with tm.assert_produces_warning(warning, check_stacklevel=False):
+        result = parser.read_csv(StringIO(data), date_parser=date_parser, **kwargs)
 
     # Python can sometimes be flaky about how
     # the aggregated columns are entered, so
@@ -1272,15 +1290,20 @@ def test_parse_date_time(all_parsers, data, kwargs, expected):
     tm.assert_frame_equal(result, expected)
 
 
-def test_parse_date_fields(all_parsers):
+@pytest.mark.parametrize(
+    "date_parser, warning",
+    ([conv.parse_date_fields, FutureWarning], [pd.to_datetime, None]),
+)
+def test_parse_date_fields(all_parsers, date_parser, warning):
     parser = all_parsers
     data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11."
-    result = parser.read_csv(
-        StringIO(data),
-        header=0,
-        parse_dates={"ymd": [0, 1, 2]},
-        date_parser=conv.parse_date_fields,
-    )
+    with tm.assert_produces_warning(warning, check_stacklevel=False):
+        result = parser.read_csv(
+            StringIO(data),
+            header=0,
+            parse_dates={"ymd": [0, 1, 2]},
+            date_parser=date_parser,
+        )
 
     expected = DataFrame(
         [[datetime(2001, 1, 10), 10.0], [datetime(2001, 2, 1), 11.0]],
@@ -1289,19 +1312,27 @@ def test_parse_date_fields(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-def test_parse_date_all_fields(all_parsers):
+@pytest.mark.parametrize(
+    "date_parser, warning",
+    (
+        [conv.parse_all_fields, FutureWarning],
+        [lambda x: pd.to_datetime(x, format="%Y %m %d %H %M %S"), None],
+    ),
+)
+def test_parse_date_all_fields(all_parsers, date_parser, warning):
     parser = all_parsers
     data = """\
 year,month,day,hour,minute,second,a,b
 2001,01,05,10,00,0,0.0,10.
 2001,01,5,10,0,00,1.,11.
 """
-    result = parser.read_csv(
-        StringIO(data),
-        header=0,
-        date_parser=conv.parse_all_fields,
-        parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]},
-    )
+    with tm.assert_produces_warning(warning, check_stacklevel=False):
+        result = parser.read_csv(
+            StringIO(data),
+            header=0,
+            date_parser=date_parser,
+            parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]},
+        )
     expected = DataFrame(
         [
             [datetime(2001, 1, 5, 10, 0, 0), 0.0, 10.0],
@@ -1312,19 +1343,27 @@ def test_parse_date_all_fields(all_parsers):
     tm.assert_frame_equal(result, expected)
 
 
-def test_datetime_fractional_seconds(all_parsers):
+@pytest.mark.parametrize(
+    "date_parser, warning",
+    (
+        [conv.parse_all_fields, FutureWarning],
+        [lambda x: pd.to_datetime(x, format="%Y %m %d %H %M %S.%f"), None],
+    ),
+)
+def test_datetime_fractional_seconds(all_parsers, date_parser, warning):
     parser = all_parsers
     data = """\
 year,month,day,hour,minute,second,a,b
 2001,01,05,10,00,0.123456,0.0,10.
 2001,01,5,10,0,0.500000,1.,11.
 """
-    result = parser.read_csv(
-        StringIO(data),
-        header=0,
-        date_parser=conv.parse_all_fields,
-        parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]},
-    )
+    with tm.assert_produces_warning(warning, check_stacklevel=False):
+        result = parser.read_csv(
+            StringIO(data),
+            header=0,
+            date_parser=date_parser,
+            parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]},
+        )
     expected = DataFrame(
         [
             [datetime(2001, 1, 5, 10, 0, 0, microsecond=123456), 0.0, 10.0],
@@ -1339,12 +1378,13 @@ def test_generic(all_parsers):
     parser = all_parsers
     data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11."
 
-    result = parser.read_csv(
-        StringIO(data),
-        header=0,
-        parse_dates={"ym": [0, 1]},
-        date_parser=lambda y, m: date(year=int(y), month=int(m), day=1),
-    )
+    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+        result = parser.read_csv(
+            StringIO(data),
+            header=0,
+            parse_dates={"ym": [0, 1]},
+            date_parser=lambda y, m: date(year=int(y), month=int(m), day=1),
+        )
     expected = DataFrame(
         [[date(2001, 1, 1), 10, 10.0], [date(2001, 2, 1), 1, 11.0]],
         columns=["ym", "day", "a"],
diff --git a/pandas/tests/io/test_date_converters.py b/pandas/tests/io/test_date_converters.py
@@ -8,21 +8,23 @@
 
 
 def test_parse_date_time():
+
     dates = np.array(["2007/1/3", "2008/2/4"], dtype=object)
     times = np.array(["05:07:09", "06:08:00"], dtype=object)
     expected = np.array([datetime(2007, 1, 3, 5, 7, 9), datetime(2008, 2, 4, 6, 8, 0)])
-
-    result = conv.parse_date_time(dates, times)
+    with tm.assert_produces_warning(FutureWarning):
+        result = conv.parse_date_time(dates, times)
     tm.assert_numpy_array_equal(result, expected)
 
 
 def test_parse_date_fields():
     days = np.array([3, 4])
     months = np.array([1, 2])
     years = np.array([2007, 2008])
-    result = conv.parse_date_fields(years, months, days)
-
     expected = np.array([datetime(2007, 1, 3), datetime(2008, 2, 4)])
+
+    with tm.assert_produces_warning(FutureWarning):
+        result = conv.parse_date_fields(years, months, days)
     tm.assert_numpy_array_equal(result, expected)
 
 
@@ -34,7 +36,8 @@ def test_parse_all_fields():
     days = np.array([3, 4])
     years = np.array([2007, 2008])
     months = np.array([1, 2])
-
-    result = conv.parse_all_fields(years, months, days, hours, minutes, seconds)
     expected = np.array([datetime(2007, 1, 3, 5, 7, 9), datetime(2008, 2, 4, 6, 8, 0)])
+
+    with tm.assert_produces_warning(FutureWarning):
+        result = conv.parse_all_fields(years, months, days, hours, minutes, seconds)
     tm.assert_numpy_array_equal(result, expected)