diff --git a/doc/source/io.rst b/doc/source/io.rst index d436fa52918d3..11a699b6a183d 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2503,6 +2503,20 @@ indices to be parsed. read_excel('path_to_file.xls', 'Sheet1', parse_cols=[0, 2, 3]) + +Parsing Dates ++++++++++++++ + +Datetime-like values are normally automatically converted to the appropriate +dtype when reading the excel file. But if you have a column of strings that +*look* like dates (but are not actually formatted as dates in excel), you can +use the `parse_dates` keyword to parse those strings to datetimes: + +.. code-block:: python + + read_excel('path_to_file.xls', 'Sheet1', parse_dates=['date_strings']) + + Cell Converters +++++++++++++++ diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index f4110cba68c31..276757f5e7d78 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -516,6 +516,7 @@ Other enhancements - The ``pd.read_json`` and ``DataFrame.to_json`` has gained support for reading and writing json lines with ``lines`` option see :ref:`Line delimited json ` (:issue:`9180`) - :func:``read_excel`` now supports the true_values and false_values keyword arguments (:issue:`13347`) - ``groupby()`` will now accept a scalar and a single-element list for specifying ``level`` on a non-``MultiIndex`` grouper. (:issue:`13907`) +- Re-enable the ``parse_dates`` keyword of ``read_excel`` to parse string columns as dates (:issue:`14326`) .. _whatsnew_0190.api: diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 6662d106ad85d..ac1d0fce4b51a 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -335,13 +335,10 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None, if 'chunksize' in kwds: raise NotImplementedError("chunksize keyword of read_excel " "is not implemented") - if parse_dates: - raise NotImplementedError("parse_dates keyword of read_excel " - "is not implemented") - if date_parser is not None: - raise NotImplementedError("date_parser keyword of read_excel " - "is not implemented") + if parse_dates is True and not index_col: + warn("The 'parse_dates=True' keyword of read_excel was provided" + " without an 'index_col' keyword value.") import xlrd from xlrd import (xldate, XL_CELL_DATE, diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index d163b05aa01d4..611b1abe57d31 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -887,17 +887,27 @@ def test_read_excel_chunksize(self): chunksize=100) def test_read_excel_parse_dates(self): - # GH 11544 - with tm.assertRaises(NotImplementedError): - pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), - parse_dates=True) + # GH 11544, 12051 - def test_read_excel_date_parser(self): - # GH 11544 - with tm.assertRaises(NotImplementedError): - dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S') - pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), - date_parser=dateparse) + df = DataFrame( + {'col': [1, 2, 3], + 'date_strings': pd.date_range('2012-01-01', periods=3)}) + df2 = df.copy() + df2['date_strings'] = df2['date_strings'].dt.strftime('%m/%d/%Y') + + with ensure_clean(self.ext) as pth: + df2.to_excel(pth) + + res = read_excel(pth) + tm.assert_frame_equal(df2, res) + + res = read_excel(pth, parse_dates=['date_strings']) + tm.assert_frame_equal(df, res) + + dateparser = lambda x: pd.datetime.strptime(x, '%m/%d/%Y') + res = read_excel(pth, parse_dates=['date_strings'], + date_parser=dateparser) + tm.assert_frame_equal(df, res) def test_read_excel_skiprows_list(self): # GH 4903 @@ -1339,8 +1349,7 @@ def test_to_excel_multiindex(self): # round trip frame.to_excel(path, 'test1', merge_cells=self.merge_cells) reader = ExcelFile(path) - df = read_excel(reader, 'test1', index_col=[0, 1], - parse_dates=False) + df = read_excel(reader, 'test1', index_col=[0, 1]) tm.assert_frame_equal(frame, df) # GH13511 @@ -1381,8 +1390,7 @@ def test_to_excel_multiindex_cols(self): frame.to_excel(path, 'test1', merge_cells=self.merge_cells) reader = ExcelFile(path) df = read_excel(reader, 'test1', header=header, - index_col=[0, 1], - parse_dates=False) + index_col=[0, 1]) if not self.merge_cells: fm = frame.columns.format(sparsify=False, adjoin=False, names=False)