Skip to content

Commit 9cd1a30

Browse files
committed
BUG: add parse_dates check in cparser (pandas-dev#31251)
1 parent cc64192 commit 9cd1a30

File tree

2 files changed

+32
-6
lines changed

2 files changed

+32
-6
lines changed

pandas/io/parsers.py

+26-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import csv
77
import datetime
88
from io import BufferedIOBase, StringIO, TextIOWrapper
9+
from itertools import chain
910
import re
1011
import sys
1112
from textwrap import fill
@@ -1419,6 +1420,26 @@ def __init__(self, kwds):
14191420
# keep references to file handles opened by the parser itself
14201421
self.handles = []
14211422

1423+
def _confirm_parse_dates_presence(self, columns):
1424+
"""
1425+
if user has provided names for parse_dates, check if those columns
1426+
are available.
1427+
"""
1428+
if isinstance(self.parse_dates, list):
1429+
cols_needed = self.parse_dates
1430+
elif isinstance(self.parse_dates, dict):
1431+
cols_needed = chain(*self.parse_dates.values())
1432+
else:
1433+
cols_needed = []
1434+
1435+
missing_cols = ", ".join(
1436+
[col for col in cols_needed if isinstance(col, str) and col not in columns]
1437+
)
1438+
if missing_cols:
1439+
raise ValueError(
1440+
f"Missing column provided to 'parse_dates': '{missing_cols}'"
1441+
)
1442+
14221443
def close(self):
14231444
for f in self.handles:
14241445
f.close()
@@ -1938,6 +1959,7 @@ def __init__(self, src, **kwds):
19381959
if len(self.names) < len(usecols):
19391960
_validate_usecols_names(usecols, self.names)
19401961

1962+
self._confirm_parse_dates_presence(self.names)
19411963
self._set_noconvert_columns()
19421964

19431965
self.orig_names = self.names
@@ -2308,6 +2330,7 @@ def __init__(self, f, **kwds):
23082330
if self.index_names is None:
23092331
self.index_names = index_names
23102332

2333+
self._confirm_parse_dates_presence(self.columns)
23112334
if self.parse_dates:
23122335
self._no_thousands_columns = self._set_no_thousands_columns()
23132336
else:
@@ -3279,7 +3302,9 @@ def _isindex(colspec):
32793302
if isinstance(colspec, int) and colspec not in data_dict:
32803303
colspec = orig_names[colspec]
32813304
elif colspec not in orig_names:
3282-
raise ValueError(f"'{colspec}' is not in list")
3305+
raise ValueError(
3306+
f"Missing column provided to 'parse_dates': '{colspec}'"
3307+
)
32833308
if _isindex(colspec):
32843309
continue
32853310
data_dict[colspec] = converter(data_dict[colspec])

pandas/tests/io/parser/test_parse_dates.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -1518,12 +1518,13 @@ def test_hypothesis_delimited_date(date_format, dayfirst, delimiter, test_dateti
15181518
assert result == expected
15191519

15201520

1521-
def test_missing_column(all_parsers):
1521+
@pytest.mark.parametrize("parse_dates", [["time", ], {"date": ["time", ]}])
1522+
def test_missing_column(all_parsers, parse_dates):
1523+
"""GH31251 column names provided in parse_dates could be missing."""
15221524
parser = all_parsers
1523-
content = StringIO("time,val\n" "212.23, 32\n")
1524-
date_cols = ["time"]
1525-
msg = "'time' is not in list"
1525+
content = StringIO("time,val\n2020-01-31,32\n")
1526+
msg = "Missing column provided to 'parse_dates': 'time'"
15261527
with pytest.raises(ValueError, match=msg):
15271528
parser.read_csv(
1528-
content, sep=",", usecols=["val"], parse_dates=date_cols,
1529+
content, sep=",", usecols=["val", ], parse_dates=parse_dates,
15291530
)

0 commit comments

Comments
 (0)