Skip to content

Commit 63dc85e

Browse files
committed
BUG: add parse_dates check in cparser (pandas-dev#31251)
1 parent 8225c8b commit 63dc85e

File tree

2 files changed

+32
-6
lines changed

2 files changed

+32
-6
lines changed

pandas/io/parsers.py

+26-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import csv
77
import datetime
88
from io import BufferedIOBase, RawIOBase, StringIO, TextIOWrapper
9+
from itertools import chain
910
import re
1011
import sys
1112
from textwrap import fill
@@ -1423,6 +1424,26 @@ def __init__(self, kwds):
14231424
# keep references to file handles opened by the parser itself
14241425
self.handles = []
14251426

1427+
def _confirm_parse_dates_presence(self, columns):
1428+
"""
1429+
if user has provided names for parse_dates, check if those columns
1430+
are available.
1431+
"""
1432+
if isinstance(self.parse_dates, list):
1433+
cols_needed = self.parse_dates
1434+
elif isinstance(self.parse_dates, dict):
1435+
cols_needed = chain(*self.parse_dates.values())
1436+
else:
1437+
cols_needed = []
1438+
1439+
missing_cols = ", ".join(
1440+
[col for col in cols_needed if isinstance(col, str) and col not in columns]
1441+
)
1442+
if missing_cols:
1443+
raise ValueError(
1444+
f"Missing column provided to 'parse_dates': '{missing_cols}'"
1445+
)
1446+
14261447
def close(self):
14271448
for f in self.handles:
14281449
f.close()
@@ -1942,6 +1963,7 @@ def __init__(self, src, **kwds):
19421963
if len(self.names) < len(usecols):
19431964
_validate_usecols_names(usecols, self.names)
19441965

1966+
self._confirm_parse_dates_presence(self.names)
19451967
self._set_noconvert_columns()
19461968

19471969
self.orig_names = self.names
@@ -2312,6 +2334,7 @@ def __init__(self, f, **kwds):
23122334
if self.index_names is None:
23132335
self.index_names = index_names
23142336

2337+
self._confirm_parse_dates_presence(self.columns)
23152338
if self.parse_dates:
23162339
self._no_thousands_columns = self._set_no_thousands_columns()
23172340
else:
@@ -3283,7 +3306,9 @@ def _isindex(colspec):
32833306
if isinstance(colspec, int) and colspec not in data_dict:
32843307
colspec = orig_names[colspec]
32853308
elif colspec not in orig_names:
3286-
raise ValueError(f"'{colspec}' is not in list")
3309+
raise ValueError(
3310+
f"Missing column provided to 'parse_dates': '{colspec}'"
3311+
)
32873312
if _isindex(colspec):
32883313
continue
32893314
data_dict[colspec] = converter(data_dict[colspec])

pandas/tests/io/parser/test_parse_dates.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -1518,12 +1518,13 @@ def test_hypothesis_delimited_date(date_format, dayfirst, delimiter, test_dateti
15181518
assert result == expected
15191519

15201520

1521-
def test_missing_column(all_parsers):
1521+
@pytest.mark.parametrize("parse_dates", [["time", ], {"date": ["time", ]}])
1522+
def test_missing_column(all_parsers, parse_dates):
1523+
"""GH31251 column names provided in parse_dates could be missing."""
15221524
parser = all_parsers
1523-
content = StringIO("time,val\n" "212.23, 32\n")
1524-
date_cols = ["time"]
1525-
msg = "'time' is not in list"
1525+
content = StringIO("time,val\n2020-01-31,32\n")
1526+
msg = "Missing column provided to 'parse_dates': 'time'"
15261527
with pytest.raises(ValueError, match=msg):
15271528
parser.read_csv(
1528-
content, sep=",", usecols=["val"], parse_dates=date_cols,
1529+
content, sep=",", usecols=["val", ], parse_dates=parse_dates,
15291530
)

0 commit comments

Comments
 (0)