pandas-dev · WillAyd · Mar 17, 2020 · Feb 28, 2020 · Feb 28, 2020 · Feb 29, 2020
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -296,6 +296,7 @@ I/O
   ``coerce_timestamps``; following pyarrow's default allows writing nanosecond
   timestamps with ``version="2.0"`` (:issue:`31652`).
 - Bug in :class:`HDFStore` that caused it to set to ``int64`` the dtype of a ``datetime64`` column when reading a DataFrame in Python 3 from fixed format written in Python 2 (:issue:`31750`)
+- `read_csv` will raise a ``ValueError`` when the columns passed in `parse_dates` is missing in the dataframe. (:issue:`31251`)
 
 Plotting
 ^^^^^^^^

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -6,10 +6,11 @@
 import csv
 import datetime
 from io import BufferedIOBase, RawIOBase, StringIO, TextIOWrapper
+from itertools import chain
 import re
 import sys
 from textwrap import fill
-from typing import Any, Dict, Set
+from typing import Any, Dict, List, Set
 import warnings
 
 import numpy as np
@@ -1419,6 +1420,56 @@ def __init__(self, kwds):
         # keep references to file handles opened by the parser itself
         self.handles = []
 
+    def _validate_parse_dates_presence(self, columns: List[str]) -> None:
+        """
+        Check if parse_dates are in columns.
+
+        If user has provided names for parse_dates, check if those columns
+        are available.
+
+        Parameters
+        ----------
+        columns : list
+            list of names of the dataframe.
+
+        Raises
+        ------
+        ValueError
+            If column to parse_date is not in dataframe.
+
+        """
+        if isinstance(self.parse_dates, list):
+            # a column in parse_dates could be represented
+            # ColReference = Union[int, str]
+            # DateGroups = List[ColReference]
+            # ParseDates = Union[ DateGroups, List[DateGroups],
+            #     Dict[ColReference, DateGroups]]
+            cols_needed = []
+            for col in self.parse_dates:
+                if isinstance(col, list):
+                    cols_needed.extend(col)
+                else:
+                    cols_needed.append(col)
+        elif isinstance(self.parse_dates, dict):
+            cols_needed = list(chain(*self.parse_dates.values()))
+        else:
+            cols_needed = []
+
+        # get only columns that are references using names (str), not by index
+        missing_cols = ", ".join(
+            sorted(
+                {
+                    col
+                    for col in cols_needed
+                    if isinstance(col, str) and col not in columns
+                }
+            )
+        )
+        if missing_cols:
+            raise ValueError(
+                f"Missing column provided to 'parse_dates': '{missing_cols}'"
+            )
+
     def close(self):
         for f in self.handles:
             f.close()
@@ -1938,6 +1989,7 @@ def __init__(self, src, **kwds):
             if len(self.names) < len(usecols):
                 _validate_usecols_names(usecols, self.names)
 
+        self._validate_parse_dates_presence(self.names)
         self._set_noconvert_columns()
 
         self.orig_names = self.names
@@ -2308,6 +2360,7 @@ def __init__(self, f, **kwds):
             if self.index_names is None:
                 self.index_names = index_names
 
+        self._validate_parse_dates_presence(self.columns)
         if self.parse_dates:
             self._no_thousands_columns = self._set_no_thousands_columns()
         else:

diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
@@ -1516,3 +1516,33 @@ def test_hypothesis_delimited_date(date_format, dayfirst, delimiter, test_dateti
 
     assert except_out_dateutil == except_in_dateutil
     assert result == expected
+
+
+@pytest.mark.parametrize(
+    "names, usecols, parse_dates, missing_cols",
+    [
+        (None, ["val"], ["date", "time"], "date, time"),
+        (None, ["val"], [0, "time"], "time"),
+        (None, ["val"], [["date", "time"]], "date, time"),
+        (None, ["val"], [[0, "time"]], "time"),
+        (None, ["val"], {"date": [0, "time"]}, "time"),
+        (None, ["val"], {"date": ["date", "time"]}, "date, time"),
+        (None, ["val"], [["date", "time"], "date"], "date, time"),
+        (["date1", "time1", "temperature"], None, ["date", "time"], "date, time"),
+        (
+            ["date1", "time1", "temperature"],
+            ["date1", "temperature"],
+            ["date1", "time"],
+            "time",
+        ),
+    ],
+)
+def test_missing_column(all_parsers, names, usecols, parse_dates, missing_cols):
+    """GH31251 column names provided in parse_dates could be missing."""
+    parser = all_parsers
+    content = StringIO("date,time,val\n2020-01-31,04:20:32,32\n")
+    msg = f"Missing column provided to 'parse_dates': '{missing_cols}'"
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(
+            content, sep=",", names=names, usecols=usecols, parse_dates=parse_dates,
+        )