Skip to content

API: raise on header=bool in parsers #11182

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 24, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions doc/source/whatsnew/v0.17.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -907,6 +907,23 @@ Changes to ``Categorical.unique``
cat
cat.unique()

Changes to ``bool`` passed as ``header`` in Parsers
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

In earlier versions of pandas, if a bool was passed the ``header`` argument of
``read_csv``, ``read_excel``, or ``read_html`` it was implicitly converted to
an integer, resulting in ``header=0`` for ``False`` and ``header=1`` for ``True``
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add the issue number here

(:issue:`6113`)

A ``bool`` input to ``header`` will now raise a ``TypeError``

.. code-block :: python

In [29]: df = pd.read_csv('data.csv', header=False)
TypeError: Passing a bool to header is invalid. Use header=None for no header or
header=int or list-like of ints to specify the row(s) making up the column names


.. _whatsnew_0170.api_breaking.other:

Other API Changes
Expand Down
6 changes: 6 additions & 0 deletions pandas/io/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,12 @@ def _expand_user(filepath_or_buffer):
return os.path.expanduser(filepath_or_buffer)
return filepath_or_buffer

def _validate_header_arg(header):
if isinstance(header, bool):
raise TypeError("Passing a bool to header is invalid. "
"Use header=None for no header or "
"header=int or list-like of ints to specify "
"the row(s) making up the column names")

def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
compression=None):
Expand Down
3 changes: 2 additions & 1 deletion pandas/io/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from pandas.core.frame import DataFrame
from pandas.io.parsers import TextParser
from pandas.io.common import _is_url, _urlopen
from pandas.io.common import _is_url, _urlopen, _validate_header_arg
from pandas.tseries.period import Period
from pandas import json
from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass,
Expand Down Expand Up @@ -217,6 +217,7 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
if skipfooter is not None:
skip_footer = skipfooter

_validate_header_arg(header)
if has_index_names is not None:
warn("\nThe has_index_names argument is deprecated; index names "
"will be automatically inferred based on index_col.\n"
Expand Down
3 changes: 2 additions & 1 deletion pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

import numpy as np

from pandas.io.common import _is_url, urlopen, parse_url
from pandas.io.common import _is_url, urlopen, parse_url, _validate_header_arg
from pandas.io.parsers import TextParser
from pandas.compat import (lrange, lmap, u, string_types, iteritems,
raise_with_traceback, binary_type)
Expand Down Expand Up @@ -861,5 +861,6 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
if isinstance(skiprows, numbers.Integral) and skiprows < 0:
raise ValueError('cannot skip rows starting from the end of the '
'data (you passed a negative value)')
_validate_header_arg(header)
return _parse(flavor, io, match, header, index_col, skiprows,
parse_dates, tupleize_cols, thousands, attrs, encoding)
4 changes: 3 additions & 1 deletion pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from pandas.core.common import AbstractMethodError
from pandas.core.config import get_option
from pandas.io.date_converters import generic_parser
from pandas.io.common import get_filepath_or_buffer
from pandas.io.common import get_filepath_or_buffer, _validate_header_arg
from pandas.tseries import tools

from pandas.util.decorators import Appender
Expand Down Expand Up @@ -673,6 +673,8 @@ def _clean_options(self, options, engine):
# really delete this one
keep_default_na = result.pop('keep_default_na')

_validate_header_arg(options['header'])

if index_col is True:
raise ValueError("The value of index_col couldn't be 'True'")
if _is_index_col(index_col):
Expand Down
9 changes: 8 additions & 1 deletion pandas/io/tests/test_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,8 @@ def test_read_excel_blank_with_header(self):
tm.assert_frame_equal(actual, expected)




class XlrdTests(ReadingTestsBase):
"""
This is the base class for the xlrd tests, and 3 different file formats
Expand Down Expand Up @@ -641,7 +643,12 @@ def test_excel_oldindex_format(self):
has_index_names=False)
tm.assert_frame_equal(actual, expected, check_names=False)


def test_read_excel_bool_header_arg(self):
#GH 6114
for arg in [True, False]:
with tm.assertRaises(TypeError):
pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext),
header=arg)

class XlsReaderTests(XlrdTests, tm.TestCase):
ext = '.xls'
Expand Down
5 changes: 5 additions & 0 deletions pandas/io/tests/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,11 @@ def test_wikipedia_states_table(self):
result = self.read_html(data, 'Arizona', header=1)[0]
nose.tools.assert_equal(result['sq mi'].dtype, np.dtype('float64'))

def test_bool_header_arg(self):
#GH 6114
for arg in [True, False]:
with tm.assertRaises(TypeError):
read_html(self.spam_data, header=arg)

def _lang_enc(filename):
return os.path.splitext(os.path.basename(filename))[0].split('_')
Expand Down
16 changes: 16 additions & 0 deletions pandas/io/tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4117,6 +4117,22 @@ def test_single_char_leading_whitespace(self):
skipinitialspace=True)
tm.assert_frame_equal(result, expected)

def test_bool_header_arg(self):
# GH 6114
data = """\
MyColumn
a
b
a
b"""
for arg in [True, False]:
with tm.assertRaises(TypeError):
pd.read_csv(StringIO(data), header=arg)
with tm.assertRaises(TypeError):
pd.read_table(StringIO(data), header=arg)
with tm.assertRaises(TypeError):
pd.read_fwf(StringIO(data), header=arg)

class TestMiscellaneous(tm.TestCase):

# for tests that don't fit into any of the other classes, e.g. those that
Expand Down