From 0e10a9d60559040be17cb711aca5c7a7928a882c Mon Sep 17 00:00:00 2001 From: "Graham R. Jeffries" Date: Fri, 15 Jan 2016 12:07:30 -0400 Subject: [PATCH 1/5] remove read_excel kwd NotImplemented error, update documentation #11544 --- doc/source/io.rst | 15 +++++++++++++++ pandas/io/excel.py | 9 +++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index d436fa52918d3..e4658b62b922b 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2503,6 +2503,21 @@ indices to be parsed. read_excel('path_to_file.xls', 'Sheet1', parse_cols=[0, 2, 3]) + +Parsing Dates ++++++++++++++ + +Datetime-like values are automatically converted to the appropriate dtype when +reading the excel file. When there is a column of strings that have to be parsed +to a datetime, you can use the `parse_dates` keyword: + +.. code-block:: python + + read_excel('path_to_file.xls', 'Sheet1', parse_dates=['date_strings']) + +So this should *not* be used when the column in excel has already a datetime-like +type. + Cell Converters +++++++++++++++ diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 6662d106ad85d..5b624f3d5a55a 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -335,13 +335,10 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None, if 'chunksize' in kwds: raise NotImplementedError("chunksize keyword of read_excel " "is not implemented") - if parse_dates: - raise NotImplementedError("parse_dates keyword of read_excel " - "is not implemented") - if date_parser is not None: - raise NotImplementedError("date_parser keyword of read_excel " - "is not implemented") + if parse_dates and not index_col: + warn("The 'parse_dates=True' keyword of read_excel was provided" + " without an 'index_col' keyword value.") import xlrd from xlrd import (xldate, XL_CELL_DATE, From 925ce1b66528ea27777078cbfcd9d013282c3bdb Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 30 Sep 2016 11:13:36 +0200 Subject: [PATCH 2/5] Update tests --- pandas/io/tests/test_excel.py | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index d163b05aa01d4..a8b0fce1d6d7f 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -887,17 +887,26 @@ def test_read_excel_chunksize(self): chunksize=100) def test_read_excel_parse_dates(self): - # GH 11544 - with tm.assertRaises(NotImplementedError): - pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), - parse_dates=True) + # GH 11544, 12051 - def test_read_excel_date_parser(self): - # GH 11544 - with tm.assertRaises(NotImplementedError): - dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S') - pd.read_excel(os.path.join(self.dirpath, 'test1' + self.ext), - date_parser=dateparse) + df = DataFrame({'col': [1, 2, 3], + 'date_strings': pd.date_range('2012-01-01', periods=3)}) + df2 = df.copy() + df2['date_strings'] = df2['date_strings'].dt.strftime('%m/%d/%Y') + + with ensure_clean(self.ext) as pth: + df2.to_excel(pth) + + res = read_excel(pth) + tm.assert_frame_equal(df2, res) + + res = read_excel(pth, parse_dates=['date_strings']) + tm.assert_frame_equal(df, res) + + dateparser = lambda x: pd.datetime.strptime(x, '%m/%d/%Y') + res = read_excel(pth, parse_dates=['date_strings'], + date_parser=dateparser) + tm.assert_frame_equal(df, res) def test_read_excel_skiprows_list(self): # GH 4903 @@ -1339,8 +1348,7 @@ def test_to_excel_multiindex(self): # round trip frame.to_excel(path, 'test1', merge_cells=self.merge_cells) reader = ExcelFile(path) - df = read_excel(reader, 'test1', index_col=[0, 1], - parse_dates=False) + df = read_excel(reader, 'test1', index_col=[0, 1]) tm.assert_frame_equal(frame, df) # GH13511 @@ -1381,8 +1389,7 @@ def test_to_excel_multiindex_cols(self): frame.to_excel(path, 'test1', merge_cells=self.merge_cells) reader = ExcelFile(path) df = read_excel(reader, 'test1', header=header, - index_col=[0, 1], - parse_dates=False) + index_col=[0, 1]) if not self.merge_cells: fm = frame.columns.format(sparsify=False, adjoin=False, names=False) From b1c7f8739b37f948bcc0b52942b6adee2cfa1db4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 30 Sep 2016 11:18:36 +0200 Subject: [PATCH 3/5] add whatsnew --- doc/source/whatsnew/v0.19.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index f4110cba68c31..276757f5e7d78 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -516,6 +516,7 @@ Other enhancements - The ``pd.read_json`` and ``DataFrame.to_json`` has gained support for reading and writing json lines with ``lines`` option see :ref:`Line delimited json ` (:issue:`9180`) - :func:``read_excel`` now supports the true_values and false_values keyword arguments (:issue:`13347`) - ``groupby()`` will now accept a scalar and a single-element list for specifying ``level`` on a non-``MultiIndex`` grouper. (:issue:`13907`) +- Re-enable the ``parse_dates`` keyword of ``read_excel`` to parse string columns as dates (:issue:`14326`) .. _whatsnew_0190.api: From 656ec44335823fee050f2c6f9ac6608c151c492e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 30 Sep 2016 11:21:55 +0200 Subject: [PATCH 4/5] Fix detection to raise warning --- pandas/io/excel.py | 2 +- pandas/io/tests/test_excel.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 5b624f3d5a55a..ac1d0fce4b51a 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -336,7 +336,7 @@ def _parse_excel(self, sheetname=0, header=0, skiprows=None, names=None, raise NotImplementedError("chunksize keyword of read_excel " "is not implemented") - if parse_dates and not index_col: + if parse_dates is True and not index_col: warn("The 'parse_dates=True' keyword of read_excel was provided" " without an 'index_col' keyword value.") diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index a8b0fce1d6d7f..611b1abe57d31 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -889,8 +889,9 @@ def test_read_excel_chunksize(self): def test_read_excel_parse_dates(self): # GH 11544, 12051 - df = DataFrame({'col': [1, 2, 3], - 'date_strings': pd.date_range('2012-01-01', periods=3)}) + df = DataFrame( + {'col': [1, 2, 3], + 'date_strings': pd.date_range('2012-01-01', periods=3)}) df2 = df.copy() df2['date_strings'] = df2['date_strings'].dt.strftime('%m/%d/%Y') From 0b65a7ab927efc7da8d5e9955e2ecb789098614b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 30 Sep 2016 12:54:12 +0200 Subject: [PATCH 5/5] update wording --- doc/source/io.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index e4658b62b922b..11a699b6a183d 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -2507,16 +2507,15 @@ indices to be parsed. Parsing Dates +++++++++++++ -Datetime-like values are automatically converted to the appropriate dtype when -reading the excel file. When there is a column of strings that have to be parsed -to a datetime, you can use the `parse_dates` keyword: +Datetime-like values are normally automatically converted to the appropriate +dtype when reading the excel file. But if you have a column of strings that +*look* like dates (but are not actually formatted as dates in excel), you can +use the `parse_dates` keyword to parse those strings to datetimes: .. code-block:: python read_excel('path_to_file.xls', 'Sheet1', parse_dates=['date_strings']) -So this should *not* be used when the column in excel has already a datetime-like -type. Cell Converters +++++++++++++++