Skip to content

Commit 5e4966c

Browse files
pandas-devgh-14671 Check if usecols with type string contains a subset of names, if not throws an error
1 parent a6fcec6 commit 5e4966c

File tree

2 files changed

+24
-0
lines changed

2 files changed

+24
-0
lines changed

pandas/io/parsers.py

+6
Original file line numberDiff line numberDiff line change
@@ -1620,6 +1620,12 @@ def __init__(self, src, **kwds):
16201620

16211621
if self.usecols:
16221622
usecols = _evaluate_usecols(self.usecols, self.orig_names)
1623+
1624+
#gh-14671
1625+
if (self.usecols_dtype == 'string') and \
1626+
(not set(usecols).issubset(self.orig_names)):
1627+
raise ValueError("Usecols do not match names.")
1628+
16231629
if len(self.names) > len(usecols):
16241630
self.names = [n for i, n in enumerate(self.names)
16251631
if (i in usecols or n in usecols)]

pandas/tests/io/parser/usecols.py

+18
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import numpy as np
1111
import pandas.util.testing as tm
12+
import re
1213

1314
from pandas import DataFrame, Index
1415
from pandas._libs.lib import Timestamp
@@ -475,3 +476,20 @@ def test_uneven_length_cols(self):
475476
'C': [3, 5, 4, 3, 3, 7]})
476477
df = self.read_csv(StringIO(data), usecols=usecols)
477478
tm.assert_frame_equal(df, expected)
479+
480+
def test_raise_on_usecols_names_mismatch(self):
481+
# see gh-14671
482+
data = 'a,b,c,d\n1,2,3,4\n5,6,7,8'
483+
usecols = ['a','b','c','d']
484+
df = self.read_csv(StringIO(data), usecols=usecols)
485+
expected = DataFrame({'a': [1,5], 'b': [2,6], 'c': [3,7], 'd': [4,8]})
486+
tm.assert_frame_equal(df, expected)
487+
488+
msg = 'Usecols do not match names' ## from parsers.py CParserWrapper()
489+
msg2 = 'is not in list' ## from parser.py _handle_usecols()
490+
usecols = ['a','b','c','f']
491+
with tm.assert_raises_regex(ValueError, re.compile("'" + msg + '||' + msg2 + "'")):
492+
self.read_csv(StringIO(data), usecols=usecols)
493+
usecols = ['a','b','f']
494+
with tm.assert_raises_regex(ValueError, re.compile("'" + msg + '||' + msg2 + "'")):
495+
self.read_csv(StringIO(data), usecols=usecols)

0 commit comments

Comments
 (0)