Merge pull request #11182 from chris-b1/header-bool-readers

jreback · jreback · commit 38ee8c701af9 · 2015-09-24T07:32:00.000-04:00
API: raise on header=bool in parsers
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
@@ -907,6 +907,23 @@ Changes to ``Categorical.unique``
    cat
    cat.unique()
 
+Changes to ``bool`` passed as ``header`` in Parsers
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In earlier versions of pandas, if a bool was passed the ``header`` argument of
+``read_csv``, ``read_excel``, or ``read_html`` it was implicitly converted to
+an integer, resulting in ``header=0`` for ``False`` and ``header=1`` for ``True``
+(:issue:`6113`)
+
+A ``bool`` input to ``header`` will now raise a ``TypeError``
+
+.. code-block :: python
+
+   In [29]: df = pd.read_csv('data.csv', header=False)
+   TypeError: Passing a bool to header is invalid. Use header=None for no header or
+   header=int or list-like of ints to specify the row(s) making up the column names
+
+
 .. _whatsnew_0170.api_breaking.other:
 
 Other API Changes
diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -194,6 +194,12 @@ def _expand_user(filepath_or_buffer):
         return os.path.expanduser(filepath_or_buffer)
     return filepath_or_buffer
 
+def _validate_header_arg(header):
+    if isinstance(header, bool):
+        raise TypeError("Passing a bool to header is invalid. "
+                        "Use header=None for no header or "
+                        "header=int or list-like of ints to specify "
+                        "the row(s) making up the column names")
 
 def get_filepath_or_buffer(filepath_or_buffer, encoding=None,
                            compression=None):
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
@@ -11,7 +11,7 @@
 
 from pandas.core.frame import DataFrame
 from pandas.io.parsers import TextParser
-from pandas.io.common import _is_url, _urlopen
+from pandas.io.common import _is_url, _urlopen, _validate_header_arg
 from pandas.tseries.period import Period
 from pandas import json
 from pandas.compat import (map, zip, reduce, range, lrange, u, add_metaclass,
@@ -217,6 +217,7 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
         if skipfooter is not None:
             skip_footer = skipfooter
 
+        _validate_header_arg(header)
         if has_index_names is not None:
             warn("\nThe has_index_names argument is deprecated; index names "
                  "will be automatically inferred based on index_col.\n"
diff --git a/pandas/io/html.py b/pandas/io/html.py
@@ -13,7 +13,7 @@
 
 import numpy as np
 
-from pandas.io.common import _is_url, urlopen, parse_url
+from pandas.io.common import _is_url, urlopen, parse_url, _validate_header_arg
 from pandas.io.parsers import TextParser
 from pandas.compat import (lrange, lmap, u, string_types, iteritems,
                            raise_with_traceback, binary_type)
@@ -861,5 +861,6 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
     if isinstance(skiprows, numbers.Integral) and skiprows < 0:
         raise ValueError('cannot skip rows starting from the end of the '
                          'data (you passed a negative value)')
+    _validate_header_arg(header)
     return _parse(flavor, io, match, header, index_col, skiprows,
                   parse_dates, tupleize_cols, thousands, attrs, encoding)
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -17,7 +17,7 @@
 from pandas.core.common import AbstractMethodError
 from pandas.core.config import get_option
 from pandas.io.date_converters import generic_parser
-from pandas.io.common import get_filepath_or_buffer
+from pandas.io.common import get_filepath_or_buffer, _validate_header_arg
 from pandas.tseries import tools
 
 from pandas.util.decorators import Appender
@@ -673,6 +673,8 @@ def _clean_options(self, options, engine):
         # really delete this one
         keep_default_na = result.pop('keep_default_na')
 
+        _validate_header_arg(options['header'])
+
         if index_col is True:
             raise ValueError("The value of index_col couldn't be 'True'")
         if _is_index_col(index_col):
diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py
@@ -384,6 +384,8 @@ def test_read_excel_blank_with_header(self):
         tm.assert_frame_equal(actual, expected)
 
 
+
+
 class XlrdTests(ReadingTestsBase):
     """
     This is the base class for the xlrd tests, and 3 different file formats
@@ -641,7 +643,12 @@ def test_excel_oldindex_format(self):
                                    has_index_names=False)
         tm.assert_frame_equal(actual, expected, check_names=False)
 
-
+    def test_read_excel_bool_header_arg(self):
+        #GH 6114
+        for arg in [True, False]:
+            with tm.assertRaises(TypeError):
+                pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext),
+                              header=arg)
 
 class XlsReaderTests(XlrdTests, tm.TestCase):
     ext = '.xls'
diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py
@@ -637,6 +637,11 @@ def test_wikipedia_states_table(self):
         result = self.read_html(data, 'Arizona', header=1)[0]
         nose.tools.assert_equal(result['sq mi'].dtype, np.dtype('float64'))
 
+    def test_bool_header_arg(self):
+        #GH 6114
+        for arg in [True, False]:
+            with tm.assertRaises(TypeError):
+                read_html(self.spam_data, header=arg)
 
 def _lang_enc(filename):
     return os.path.splitext(os.path.basename(filename))[0].split('_')
diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -4117,6 +4117,22 @@ def test_single_char_leading_whitespace(self):
                                skipinitialspace=True)
         tm.assert_frame_equal(result, expected)
 
+    def test_bool_header_arg(self):
+        # GH 6114
+        data = """\
+MyColumn
+   a
+   b
+   a
+   b"""
+        for arg in [True, False]:
+            with tm.assertRaises(TypeError):
+                pd.read_csv(StringIO(data), header=arg)
+            with tm.assertRaises(TypeError):
+                pd.read_table(StringIO(data), header=arg)
+            with tm.assertRaises(TypeError):
+                pd.read_fwf(StringIO(data), header=arg)
+
 class TestMiscellaneous(tm.TestCase):
 
     # for tests that don't fit into any of the other classes, e.g. those that