Skip to content

Commit 4af9e45

Browse files
brendapraggastisgfyoung
authored andcommitted
Check for usecols mismatch with names
Closes pandas-devgh-14671.
1 parent 7271f50 commit 4af9e45

File tree

2 files changed

+55
-0
lines changed

2 files changed

+55
-0
lines changed

pandas/io/parsers.py

+6
Original file line numberDiff line numberDiff line change
@@ -1626,6 +1626,12 @@ def __init__(self, src, **kwds):
16261626

16271627
if self.usecols:
16281628
usecols = _evaluate_usecols(self.usecols, self.orig_names)
1629+
1630+
# see gh-14671
1631+
if (self.usecols_dtype == 'string' and
1632+
not set(usecols).issubset(self.orig_names)):
1633+
raise ValueError("Usecols do not match names.")
1634+
16291635
if len(self.names) > len(usecols):
16301636
self.names = [n for i, n in enumerate(self.names)
16311637
if (i in usecols or n in usecols)]

pandas/tests/io/parser/usecols.py

+49
Original file line numberDiff line numberDiff line change
@@ -475,3 +475,52 @@ def test_uneven_length_cols(self):
475475
'C': [3, 5, 4, 3, 3, 7]})
476476
df = self.read_csv(StringIO(data), usecols=usecols)
477477
tm.assert_frame_equal(df, expected)
478+
479+
def test_raise_on_usecols_names_mismatch(self):
480+
# see gh-14671
481+
msg = ('Usecols do not match names' if self.engine == 'c'
482+
else 'is not in list')
483+
data = 'a,b,c,d\n1,2,3,4\n5,6,7,8'
484+
485+
usecols = ['a', 'b', 'c', 'd']
486+
df = self.read_csv(StringIO(data), usecols=usecols)
487+
expected = DataFrame({'a': [1, 5], 'b': [2, 6],
488+
'c': [3, 7], 'd': [4, 8]})
489+
tm.assert_frame_equal(df, expected)
490+
491+
usecols = ['a', 'b', 'c', 'f']
492+
with tm.assert_raises_regex(ValueError, msg):
493+
self.read_csv(StringIO(data), usecols=usecols)
494+
495+
usecols = ['a', 'b', 'f']
496+
with tm.assert_raises_regex(ValueError, msg):
497+
self.read_csv(StringIO(data), usecols=usecols)
498+
499+
names = ['A', 'B', 'C', 'D']
500+
501+
df = self.read_csv(StringIO(data), header=0, names=names)
502+
expected = DataFrame({'A': [1, 5], 'B': [2, 6],
503+
'C': [3, 7], 'D': [4, 8]})
504+
tm.assert_frame_equal(df, expected)
505+
506+
# usecols = ['A', 'C']
507+
# df = self.read_csv(StringIO(data), header=0,
508+
# names=names, usecols=usecols)
509+
# expected = DataFrame({'A': [1, 5], 'C': [3, 7]})
510+
# tm.assert_frame_equal(df, expected)
511+
#
512+
# usecols = [0, 2]
513+
# df = self.read_csv(StringIO(data), header=0,
514+
# names=names, usecols=usecols)
515+
# expected = DataFrame({'A': [1, 5], 'C': [3, 7]})
516+
# tm.assert_frame_equal(df, expected)
517+
518+
usecols = ['A', 'B', 'C', 'f']
519+
with tm.assert_raises_regex(ValueError, msg):
520+
self.read_csv(StringIO(data), header=0,
521+
names=names, usecols=usecols)
522+
523+
usecols = ['A', 'B', 'f']
524+
with tm.assert_raises_regex(ValueError, msg):
525+
self.read_csv(StringIO(data),
526+
names=names, usecols=usecols)

0 commit comments

Comments
 (0)