Skip to content

Commit b28eb10

Browse files
gfyoungjreback
authored andcommitted
BUG: Validate the skipfooter parameter in read_csv (#15945)
Previously, the skipfooter parameter was assumed to be an integer, but that was not checked.
1 parent 11c6f48 commit b28eb10

File tree

5 files changed

+50
-19
lines changed

5 files changed

+50
-19
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1185,6 +1185,7 @@ I/O
11851185
- Bug in ``pd.read_csv()`` in which certain invalid file objects caused the Python interpreter to crash (:issue:`15337`)
11861186
- Bug in ``pd.read_csv()`` in which invalid values for ``nrows`` and ``chunksize`` were allowed (:issue:`15767`)
11871187
- Bug in ``pd.read_csv()`` for the Python engine in which unhelpful error messages were being raised when parsing errors occurred (:issue:`15910`)
1188+
- Bug in ``pd.read_csv()`` in which the ``skipfooter`` parameter was not being properly validated (:issue:`15925`)
11881189
- Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`)
11891190
- Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`)
11901191
- Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`)

pandas/io/parsers.py

+32-4
Original file line numberDiff line numberDiff line change
@@ -1036,6 +1036,37 @@ def _evaluate_usecols(usecols, names):
10361036
return usecols
10371037

10381038

1039+
def _validate_skipfooter_arg(skipfooter):
1040+
"""
1041+
Validate the 'skipfooter' parameter.
1042+
1043+
Checks whether 'skipfooter' is a non-negative integer.
1044+
Raises a ValueError if that is not the case.
1045+
1046+
Parameters
1047+
----------
1048+
skipfooter : non-negative integer
1049+
The number of rows to skip at the end of the file.
1050+
1051+
Returns
1052+
-------
1053+
validated_skipfooter : non-negative integer
1054+
The original input if the validation succeeds.
1055+
1056+
Raises
1057+
------
1058+
ValueError : 'skipfooter' was not a non-negative integer.
1059+
"""
1060+
1061+
if not is_integer(skipfooter):
1062+
raise ValueError("skipfooter must be an integer")
1063+
1064+
if skipfooter < 0:
1065+
raise ValueError("skipfooter cannot be negative")
1066+
1067+
return skipfooter
1068+
1069+
10391070
def _validate_usecols_arg(usecols):
10401071
"""
10411072
Validate the 'usecols' parameter.
@@ -1880,7 +1911,7 @@ def __init__(self, f, **kwds):
18801911
else:
18811912
self.skipfunc = lambda x: x in self.skiprows
18821913

1883-
self.skipfooter = kwds['skipfooter']
1914+
self.skipfooter = _validate_skipfooter_arg(kwds['skipfooter'])
18841915
self.delimiter = kwds['delimiter']
18851916

18861917
self.quotechar = kwds['quotechar']
@@ -2684,9 +2715,6 @@ def _get_index_name(self, columns):
26842715
return index_name, orig_names, columns
26852716

26862717
def _rows_to_cols(self, content):
2687-
if self.skipfooter < 0:
2688-
raise ValueError('skip footer cannot be negative')
2689-
26902718
col_len = self.num_original_columns
26912719

26922720
if self._implicit_index:

pandas/tests/io/parser/common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,7 @@ def test_iterator(self):
546546
if self.engine == 'python':
547547
# test bad parameter (skipfooter)
548548
reader = self.read_csv(StringIO(self.data1), index_col=0,
549-
iterator=True, skipfooter=True)
549+
iterator=True, skipfooter=1)
550550
self.assertRaises(ValueError, reader.read, 3)
551551

552552
def test_pass_names_with_index(self):

pandas/tests/io/parser/python_parser_only.py

+15-13
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,22 @@
2020

2121
class PythonParserTests(object):
2222

23-
def test_negative_skipfooter_raises(self):
24-
text = """#foo,a,b,c
25-
#foo,a,b,c
26-
#foo,a,b,c
27-
#foo,a,b,c
28-
#foo,a,b,c
29-
#foo,a,b,c
30-
1/1/2000,1.,2.,3.
31-
1/2/2000,4,5,6
32-
1/3/2000,7,8,9
33-
"""
23+
def test_invalid_skipfooter(self):
24+
text = "a\n1\n2"
25+
26+
# see gh-15925 (comment)
27+
msg = "skipfooter must be an integer"
28+
with tm.assertRaisesRegexp(ValueError, msg):
29+
self.read_csv(StringIO(text), skipfooter="foo")
30+
31+
with tm.assertRaisesRegexp(ValueError, msg):
32+
self.read_csv(StringIO(text), skipfooter=1.5)
33+
34+
with tm.assertRaisesRegexp(ValueError, msg):
35+
self.read_csv(StringIO(text), skipfooter=True)
3436

35-
with tm.assertRaisesRegexp(
36-
ValueError, 'skip footer cannot be negative'):
37+
msg = "skipfooter cannot be negative"
38+
with tm.assertRaisesRegexp(ValueError, msg):
3739
self.read_csv(StringIO(text), skipfooter=-1)
3840

3941
def test_sniff_delimiter(self):

pandas/tests/io/parser/test_unsupported.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,8 @@ def test_deprecated_args(self):
112112
'as_recarray': True,
113113
'buffer_lines': True,
114114
'compact_ints': True,
115-
'skip_footer': True,
116115
'use_unsigned': True,
116+
'skip_footer': 1,
117117
}
118118

119119
engines = 'c', 'python'

0 commit comments

Comments
 (0)