BUG: add names parameter to read_excel

OXPHOS · jreback · commit 6c692aee6d41 · 2016-04-15T20:43:17.000-05:00
closes #12870 closes #12895
diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt
@@ -320,3 +320,4 @@ Bug Fixes
 - Bug in ``.describe()`` resets categorical columns information (:issue:`11558`)
 - Bug where ``loffset`` argument was not applied when calling ``resample().count()`` on a timeseries (:issue:`12725`)
 - ``pd.read_excel()`` now accepts path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path, in line with other ``read_*`` functions (:issue:`12655`)
+- ``pd.read_excel()`` now accepts column names associated with keyword argument ``names``(:issue `12870`)
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
@@ -170,7 +170,7 @@ def read_excel(io, sheetname=0, header=0, skiprows=None, skip_footer=0,
         io = ExcelFile(io, engine=engine)
 
     return io._parse_excel(
-        sheetname=sheetname, header=header, skiprows=skiprows,
+        sheetname=sheetname, header=header, skiprows=skiprows, names=names,
         index_col=index_col, parse_cols=parse_cols, parse_dates=parse_dates,
         date_parser=date_parser, na_values=na_values, thousands=thousands,
         convert_float=convert_float, has_index_names=has_index_names,
@@ -230,7 +230,7 @@ def __init__(self, io, **kwds):
                              ' buffer or path for io.')
 
     def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
-              index_col=None, parse_cols=None, parse_dates=False,
+              names=None, index_col=None, parse_cols=None, parse_dates=False,
               date_parser=None, na_values=None, thousands=None,
               convert_float=True, has_index_names=None,
               converters=None, squeeze=False, **kwds):
@@ -242,7 +242,7 @@ def parse(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
         """
 
         return self._parse_excel(sheetname=sheetname, header=header,
-                                 skiprows=skiprows,
+                                 skiprows=skiprows, names=names,
                                  index_col=index_col,
                                  has_index_names=has_index_names,
                                  parse_cols=parse_cols,
@@ -288,10 +288,10 @@ def _excel2num(x):
         else:
             return i in parse_cols
 
-    def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0,
-                     index_col=None, has_index_names=None, parse_cols=None,
-                     parse_dates=False, date_parser=None, na_values=None,
-                     thousands=None, convert_float=True,
+    def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None,
+                     skip_footer=0, index_col=None, has_index_names=None,
+                     parse_cols=None, parse_dates=False, date_parser=None,
+                     na_values=None, thousands=None, convert_float=True,
                      verbose=False, squeeze=False, **kwds):
 
         skipfooter = kwds.pop('skipfooter', None)
@@ -465,6 +465,8 @@ def _parse_cell(cell_contents, cell_typ):
                                     **kwds)
 
                 output[asheetname] = parser.read()
+                if names is not None:
+                    output[asheetname].columns = names
                 if not squeeze or isinstance(output[asheetname], DataFrame):
                     output[asheetname].columns = output[
                         asheetname].columns.set_names(header_names)
diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py
@@ -458,6 +458,29 @@ def test_read_one_empty_col_with_header(self):
         expected_header_zero = DataFrame(columns=[0], dtype='int64')
         tm.assert_frame_equal(actual_header_zero, expected_header_zero)
 
+    def test_set_column_names_in_parameter(self):
+        # GH 12870 : pass down column names associated with
+        # keyword argument names
+        refdf = pd.DataFrame([[1, 'foo'], [2, 'bar'],
+                              [3, 'baz']], columns=['a', 'b'])
+
+        with ensure_clean(self.ext) as pth:
+            with ExcelWriter(pth) as writer:
+                refdf.to_excel(writer, 'Data_no_head',
+                               header=False, index=False)
+                refdf.to_excel(writer, 'Data_with_head', index=False)
+
+            refdf.columns = ['A', 'B']
+
+            with ExcelFile(pth) as reader:
+                xlsdf_no_head = read_excel(reader, 'Data_no_head',
+                                           header=None, names=['A', 'B'])
+                xlsdf_with_head = read_excel(reader, 'Data_with_head',
+                                             index_col=None, names=['A', 'B'])
+
+            tm.assert_frame_equal(xlsdf_no_head, refdf)
+            tm.assert_frame_equal(xlsdf_with_head, refdf)
+
 
 class XlrdTests(ReadingTestsBase):
     """