-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
check parser_dates names in columns #31815
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
24d1657
79ca148
c988567
61034b4
46a77f6
74dad82
f7e2b74
7d1f825
08bdf31
c389222
a8817ba
0a52507
04e9a78
ccc6923
341a719
c3a9ea3
0aad719
a76cfbf
b30d7d4
8cfcf65
f04d913
980ab6b
b42505e
c5fb994
c4e8eb3
a96bdbd
d2a14ac
11f1500
bf4e74d
39e9b35
d7996b9
81669e3
625441b
4ab7bb4
6bc2dca
fcf7258
f27d70f
292a993
8dd9fab
8680ddd
66440b2
d494717
3060972
4181042
9d66896
50ebb24
ba59f6a
7509aed
4a1fbad
02887a5
1117328
1b45d90
bec7378
f1c5cb0
f49907f
1e6cf96
35174ae
415e43b
634a41f
16684f2
361a938
9767da6
2154ad3
dd03c19
012a6a3
143b011
c2f3ce3
bc6ab05
97054ac
a4d743e
ee8b856
48cb5a9
8a7fbbe
50dad9c
56cc7f4
4ac1e5f
95b0e14
e8eb49d
bcfc608
c67407d
c8f32cb
870ef1e
72bc92e
32b3d9e
67fc9e0
ff05154
5dd27ed
fa2aa9f
bfcfaae
8425c26
6d30046
00a00a1
bead1c3
8f49265
153244b
571a73b
b41911e
74823a0
e99db38
eb97073
2aa9cb9
53ece70
cc4c0b3
45d093d
206a547
92a64c7
3689e6c
2e8274b
10228cb
f163f25
3118576
a7ecced
4f0568e
05ab8ba
06eb8db
267d2d8
8444453
c81b0ba
92bb4c9
cea4059
3b4b86b
7b0887c
3da053c
ebeb407
bdc7fd0
9c06b30
f4dc9f9
aa1089f
0c107bd
3cb81ea
ac3056f
02ac975
30bb0f0
74181e1
421f654
96644d0
60b8f05
304209f
11164c3
37a7006
27e18e5
2c1c36f
a12ab06
73b2622
38e16c4
cb4f739
c05ef6f
7d37ab8
4a05601
d171c87
9353ca7
c0066f3
80d37ad
b9bcdc3
9f7cd99
494ffd6
9e69040
dafec63
8de4096
1825fa1
10d10c6
6ab00bc
628dfba
a66e149
9aa9e4f
5ed1a0b
89d5f87
25443f0
00e8e4a
80387ae
f9b49c8
020dcce
b2ebd5a
016482a
dbc446a
ab56348
41bc226
20a84a5
7017599
7b99f03
0edd2d9
034fab5
58df0ac
d4293f0
9a02c35
df49f53
aa6f241
1c142e7
7d32184
241bf60
54b4001
ee9cb75
e88629f
69f4c70
cf993fd
1bcc368
2227c83
2eca9e8
786bfd9
c5e3e25
5f9fb1d
972ab61
7c7048c
f3a9b78
a152c30
89ed095
7c5d3d5
2fc8559
9bf3a28
8b200c1
6c74f88
cf957ad
2885b30
a713063
1b8b428
6e04264
27f0000
35537dd
9535246
e39cd30
e6bd49f
d8115ce
9a8e83a
52a63ab
ed7adcd
2c060b4
8621970
97c0ce9
eb6f8d3
66c6e8e
922f932
736761f
cad3f1c
0233a55
674dd69
217a428
1d18e95
86f0468
d5d6834
b4cbc19
4800ab4
ea1d8fa
1e7dc49
5fa9860
edcf1c8
7eb6713
5cadb42
7fed1ca
999ffc5
5b381e3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
import csv | ||
import datetime | ||
from io import BufferedIOBase, RawIOBase, StringIO, TextIOWrapper | ||
from itertools import chain | ||
import re | ||
import sys | ||
from textwrap import fill | ||
|
@@ -1423,6 +1424,26 @@ def __init__(self, kwds): | |
# keep references to file handles opened by the parser itself | ||
self.handles = [] | ||
|
||
def _confirm_parse_dates_presence(self, columns): | ||
""" | ||
if user has provided names for parse_dates, check if those columns | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a full-docstring (see other validation functions) |
||
are available. | ||
""" | ||
if isinstance(self.parse_dates, list): | ||
cols_needed = self.parse_dates | ||
elif isinstance(self.parse_dates, dict): | ||
cols_needed = chain(*self.parse_dates.values()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do you need the chain here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is okay. The reason is that That being said, we should definitely check this (see my other comment about adding tests) |
||
else: | ||
cols_needed = [] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so this is a single scalar column, why is it not needing validation? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
according to the read_csv doc, this can't be a single scalar. |
||
|
||
missing_cols = ", ".join( | ||
[col for col in cols_needed if isinstance(col, str) and col not in columns] | ||
) | ||
if missing_cols: | ||
raise ValueError( | ||
f"Missing column provided to 'parse_dates': '{missing_cols}'" | ||
) | ||
|
||
def close(self): | ||
for f in self.handles: | ||
f.close() | ||
|
@@ -1942,6 +1963,7 @@ def __init__(self, src, **kwds): | |
if len(self.names) < len(usecols): | ||
_validate_usecols_names(usecols, self.names) | ||
|
||
self._confirm_parse_dates_presence(self.names) | ||
self._set_noconvert_columns() | ||
|
||
self.orig_names = self.names | ||
|
@@ -2312,6 +2334,7 @@ def __init__(self, f, **kwds): | |
if self.index_names is None: | ||
self.index_names = index_names | ||
|
||
self._confirm_parse_dates_presence(self.columns) | ||
if self.parse_dates: | ||
self._no_thousands_columns = self._set_no_thousands_columns() | ||
else: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1516,3 +1516,15 @@ def test_hypothesis_delimited_date(date_format, dayfirst, delimiter, test_dateti | |
|
||
assert except_out_dateutil == except_in_dateutil | ||
assert result == expected | ||
|
||
|
||
@pytest.mark.parametrize("parse_dates", [["time"], {"date": ["time"]}]) | ||
def test_missing_column(all_parsers, parse_dates): | ||
"""GH31251 column names provided in parse_dates could be missing.""" | ||
parser = all_parsers | ||
content = StringIO("time,val\n2020-01-31,32\n") | ||
msg = "Missing column provided to 'parse_dates': 'time'" | ||
with pytest.raises(ValueError, match=msg): | ||
parser.read_csv( | ||
content, sep=",", usecols=["val"], parse_dates=parse_dates, | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
rename to _valdate_parse_dates
pls add type annotations for columns