Skip to content

BUG: Validate the skipfooter parameter in read_csv #15945

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 8, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1184,6 +1184,7 @@ I/O
- Bug in ``pd.read_csv()`` in which certain invalid file objects caused the Python interpreter to crash (:issue:`15337`)
- Bug in ``pd.read_csv()`` in which invalid values for ``nrows`` and ``chunksize`` were allowed (:issue:`15767`)
- Bug in ``pd.read_csv()`` for the Python engine in which unhelpful error messages were being raised when parsing errors occurred (:issue:`15910`)
- Bug in ``pd.read_csv()`` in which the ``skipfooter`` parameter was not being properly validated (:issue:`15925`)
- Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`)
- Bug in ``.to_json()`` where ``lines=True`` and contents (keys or values) contain escaped characters (:issue:`15096`)
- Bug in ``.to_json()`` causing single byte ascii characters to be expanded to four byte unicode (:issue:`15344`)
Expand Down
36 changes: 32 additions & 4 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1036,6 +1036,37 @@ def _evaluate_usecols(usecols, names):
return usecols


def _validate_skipfooter_arg(skipfooter):
"""
Validate the 'skipfooter' parameter.

Checks whether 'skipfooter' is a non-negative integer.
Raises a ValueError if that is not the case.

Parameters
----------
skipfooter : non-negative integer
The number of rows to skip at the end of the file.

Returns
-------
validated_skipfooter : non-negative integer
The original input if the validation succeeds.

Raises
------
ValueError : 'skipfooter' was not a non-negative integer.
"""

if not is_integer(skipfooter):
raise ValueError("skipfooter must be an integer")

if skipfooter < 0:
raise ValueError("skipfooter cannot be negative")

return skipfooter


def _validate_usecols_arg(usecols):
"""
Validate the 'usecols' parameter.
Expand Down Expand Up @@ -1880,7 +1911,7 @@ def __init__(self, f, **kwds):
else:
self.skipfunc = lambda x: x in self.skiprows

self.skipfooter = kwds['skipfooter']
self.skipfooter = _validate_skipfooter_arg(kwds['skipfooter'])
self.delimiter = kwds['delimiter']

self.quotechar = kwds['quotechar']
Expand Down Expand Up @@ -2684,9 +2715,6 @@ def _get_index_name(self, columns):
return index_name, orig_names, columns

def _rows_to_cols(self, content):
if self.skipfooter < 0:
raise ValueError('skip footer cannot be negative')

col_len = self.num_original_columns

if self._implicit_index:
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/parser/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ def test_iterator(self):
if self.engine == 'python':
# test bad parameter (skipfooter)
reader = self.read_csv(StringIO(self.data1), index_col=0,
iterator=True, skipfooter=True)
iterator=True, skipfooter=1)
self.assertRaises(ValueError, reader.read, 3)

def test_pass_names_with_index(self):
Expand Down
28 changes: 15 additions & 13 deletions pandas/tests/io/parser/python_parser_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,22 @@

class PythonParserTests(object):

def test_negative_skipfooter_raises(self):
text = """#foo,a,b,c
#foo,a,b,c
#foo,a,b,c
#foo,a,b,c
#foo,a,b,c
#foo,a,b,c
1/1/2000,1.,2.,3.
1/2/2000,4,5,6
1/3/2000,7,8,9
"""
def test_invalid_skipfooter(self):
text = "a\n1\n2"

# see gh-15925 (comment)
msg = "skipfooter must be an integer"
with tm.assertRaisesRegexp(ValueError, msg):
self.read_csv(StringIO(text), skipfooter="foo")

with tm.assertRaisesRegexp(ValueError, msg):
self.read_csv(StringIO(text), skipfooter=1.5)

with tm.assertRaisesRegexp(ValueError, msg):
self.read_csv(StringIO(text), skipfooter=True)

with tm.assertRaisesRegexp(
ValueError, 'skip footer cannot be negative'):
msg = "skipfooter cannot be negative"
with tm.assertRaisesRegexp(ValueError, msg):
self.read_csv(StringIO(text), skipfooter=-1)

def test_sniff_delimiter(self):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/parser/test_unsupported.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ def test_deprecated_args(self):
'as_recarray': True,
'buffer_lines': True,
'compact_ints': True,
'skip_footer': True,
'use_unsigned': True,
'skip_footer': 1,
}

engines = 'c', 'python'
Expand Down