diff --git a/doc/source/io.rst b/doc/source/io.rst index 041daaeb3b12f..d2e97ad6b9e84 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2192,6 +2192,18 @@ indices to be parsed. read_excel('path_to_file.xls', 'Sheet1', parse_cols=[0, 2, 3]) + +Parsing Dates ++++++++++++++ + +The `parse_dates` keyword for `read_excel` is used to specify whether to parse strings +to a datetime. + +.. code-block:: python + + read_excel('path_to_file.xls', 'Sheet1', parse_dates=['strings']) + + Cell Converters +++++++++++++++ diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 3496e9eea834c..eb644b11254bd 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -481,7 +481,7 @@ Bug Fixes - Bug in ``df.replace`` while replacing value in mixed dtype ``Dataframe`` (:issue:`11698`) - Bug in ``Index`` prevents copying name of passed ``Index``, when a new name is not provided (:issue:`11193`) - Bug in ``read_excel`` failing to read any non-empty sheets when empty sheets exist and ``sheetname=None`` (:issue:`11711`) -- Bug in ``read_excel`` failing to raise ``NotImplemented`` error when keywords ``parse_dates`` and ``date_parser`` are provided (:issue:`11544`) +- Bug in ``read_excel`` failing to raise warning when keyword ``parse_dates`` and is provided without keyword ``index_col`` (:issue:`11544`) - Bug in ``read_sql`` with pymysql connections failing to return chunked data (:issue:`11522`) - Bug in ``.to_csv`` ignoring formatting parameters ``decimal``, ``na_rep``, ``float_format`` for float indexes (:issue:`11553`) - Bug in ``Int64Index`` and ``Float64Index`` preventing the use of the modulo operator (:issue:`9244`) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 106d263f56093..3388e4a250b7a 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -298,13 +298,10 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, skip_footer=0, if 'chunksize' in kwds: raise NotImplementedError("chunksize keyword of read_excel " "is not implemented") - if parse_dates: - raise NotImplementedError("parse_dates keyword of read_excel " - "is not implemented") - if date_parser is not None: - raise NotImplementedError("date_parser keyword of read_excel " - "is not implemented") + if parse_dates and not index_col: + warn("The parse_dates keyword of read_excel was provided without " + "an index_col keyword value.") import xlrd from xlrd import (xldate, XL_CELL_DATE, diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 8023c25cdd660..308d495375ae8 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -167,9 +167,10 @@ def test_parse_cols_int(self): dfref = self.get_csv_refdf('test1') dfref = dfref.reindex(columns=['A', 'B', 'C']) - df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_cols=3) - df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, + df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True, parse_cols=3) + df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, + parse_dates=True, parse_cols=3) # TODO add index to xls file) tm.assert_frame_equal(df1, dfref, check_names=False) tm.assert_frame_equal(df2, dfref, check_names=False) @@ -178,9 +179,10 @@ def test_parse_cols_list(self): dfref = self.get_csv_refdf('test1') dfref = dfref.reindex(columns=['B', 'C']) - df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, + df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True, parse_cols=[0, 2, 3]) df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, + parse_dates=True, parse_cols=[0, 2, 3]) # TODO add index to xls file) tm.assert_frame_equal(df1, dfref, check_names=False) @@ -191,28 +193,28 @@ def test_parse_cols_str(self): dfref = self.get_csv_refdf('test1') df1 = dfref.reindex(columns=['A', 'B', 'C']) - df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, + df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True, parse_cols='A:D') df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, - parse_cols='A:D') + parse_dates=True, parse_cols='A:D') # TODO add index to xls, read xls ignores index name ? tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) df1 = dfref.reindex(columns=['B', 'C']) - df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, + df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True, parse_cols='A,C,D') df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, - parse_cols='A,C,D') + parse_dates=True, parse_cols='A,C,D') # TODO add index to xls file tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) df1 = dfref.reindex(columns=['B', 'C']) - df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, + df2 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True, parse_cols='A,C:D') df3 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, - parse_cols='A,C:D') + parse_dates=True, parse_cols='A,C:D') tm.assert_frame_equal(df2, df1, check_names=False) tm.assert_frame_equal(df3, df1, check_names=False) @@ -249,23 +251,23 @@ def test_excel_table_sheet_by_index(self): excel = self.get_excelfile('test1') dfref = self.get_csv_refdf('test1') - df1 = read_excel(excel, 0, index_col=0) - df2 = read_excel(excel, 1, skiprows=[1], index_col=0) + df1 = read_excel(excel, 0, index_col=0, parse_dates=True) + df2 = read_excel(excel, 1, skiprows=[1], index_col=0, parse_dates=True) tm.assert_frame_equal(df1, dfref, check_names=False) tm.assert_frame_equal(df2, dfref, check_names=False) - df1 = excel.parse(0, index_col=0) - df2 = excel.parse(1, skiprows=[1], index_col=0) + df1 = excel.parse(0, index_col=0, parse_dates=True) + df2 = excel.parse(1, skiprows=[1], index_col=0, parse_dates=True) tm.assert_frame_equal(df1, dfref, check_names=False) tm.assert_frame_equal(df2, dfref, check_names=False) - df3 = read_excel(excel, 0, index_col=0, skipfooter=1) - df4 = read_excel(excel, 0, index_col=0, skip_footer=1) + df3 = read_excel(excel, 0, index_col=0, parse_dates=True, skipfooter=1) + df4 = read_excel(excel, 0, index_col=0, parse_dates=True, skip_footer=1) tm.assert_frame_equal(df3, df1.ix[:-1]) tm.assert_frame_equal(df3, df4) - df3 = excel.parse(0, index_col=0, skipfooter=1) - df4 = excel.parse(0, index_col=0, skip_footer=1) + df3 = excel.parse(0, index_col=0, parse_dates=True, skipfooter=1) + df4 = excel.parse(0, index_col=0, parse_dates=True, skip_footer=1) tm.assert_frame_equal(df3, df1.ix[:-1]) tm.assert_frame_equal(df3, df4) @@ -277,15 +279,16 @@ def test_excel_table(self): dfref = self.get_csv_refdf('test1') - df1 = self.get_exceldf('test1', 'Sheet1', index_col=0) - df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0) + df1 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True) + df2 = self.get_exceldf('test1', 'Sheet2', skiprows=[1], index_col=0, + parse_dates=True) # TODO add index to file tm.assert_frame_equal(df1, dfref, check_names=False) tm.assert_frame_equal(df2, dfref, check_names=False) - df3 = self.get_exceldf('test1', 'Sheet1', index_col=0, + df3 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True, skipfooter=1) - df4 = self.get_exceldf('test1', 'Sheet1', index_col=0, + df4 = self.get_exceldf('test1', 'Sheet1', index_col=0, parse_dates=True, skip_footer=1) tm.assert_frame_equal(df3, df1.ix[:-1]) tm.assert_frame_equal(df3, df4) @@ -408,14 +411,14 @@ class XlrdTests(ReadingTestsBase): def test_excel_read_buffer(self): pth = os.path.join(self.dirpath, 'test1' + self.ext) - expected = read_excel(pth, 'Sheet1', index_col=0) + expected = read_excel(pth, 'Sheet1', index_col=0, parse_dates=True) with open(pth, 'rb') as f: - actual = read_excel(f, 'Sheet1', index_col=0) + actual = read_excel(f, 'Sheet1', index_col=0, parse_dates=True) tm.assert_frame_equal(expected, actual) with open(pth, 'rb') as f: xls = ExcelFile(f) - actual = read_excel(xls, 'Sheet1', index_col=0) + actual = read_excel(xls, 'Sheet1', index_col=0, parse_dates=True) tm.assert_frame_equal(expected, actual) def test_read_xlrd_Book(self): @@ -677,7 +680,7 @@ def test_excel_oldindex_format(self): tm.assert_frame_equal(actual, expected, check_names=False) def test_read_excel_bool_header_arg(self): - # GH 6114 + #GH 6114 for arg in [True, False]: with tm.assertRaises(TypeError): pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), @@ -689,19 +692,6 @@ def test_read_excel_chunksize(self): pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), chunksize=100) - def test_read_excel_parse_dates(self): - # GH 11544 - with tm.assertRaises(NotImplementedError): - pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), - parse_dates=True) - - def test_read_excel_date_parser(self): - # GH 11544 - with tm.assertRaises(NotImplementedError): - dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S') - pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), - date_parser=dateparse) - def test_read_excel_skiprows_list(self): #GH 4903 actual = pd.read_excel(os.path.join(self.dirpath, 'testskiprows' + self.ext), @@ -1103,7 +1093,7 @@ def test_to_excel_periodindex(self): xp.to_excel(path, 'sht1') reader = ExcelFile(path) - rs = read_excel(reader, 'sht1', index_col=0) + rs = read_excel(reader, 'sht1', index_col=0, parse_dates=True) tm.assert_frame_equal(xp, rs.to_period('M')) def test_to_excel_multiindex(self):