From 180923377ccb31b7469149602ef86e95b2afea8b Mon Sep 17 00:00:00 2001 From: Ryan Joyce Date: Sun, 23 Jun 2019 01:30:57 -0400 Subject: [PATCH 01/10] BUG: XlsxWriter ignoring formats on numpy types if merged cells The write_cells method takes a 'cells' parameter that is itself derived from a generator of the various cells in the dataframe. It calls _value_with_format to convert any numpy types to Python types for the Excel writer. In the section of code that deals with writing merged cells, the original 'cell.val' parameter was being passed into the writer, rather than the 'val' returned from the format function. This caused incompatible numpy or Pandas formats to get passed into the writer when they were in a 'merged' (grouped) DataFrame cell. In my case I had a Period object in the DataFrame index. All that needed to be done was simply removing 'cell.' --- pandas/io/excel/_xlsxwriter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py index 2dc736f81f6f8..2ddfcf3de5a8f 100644 --- a/pandas/io/excel/_xlsxwriter.py +++ b/pandas/io/excel/_xlsxwriter.py @@ -210,7 +210,7 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, startcol + cell.col, startrow + cell.mergestart, startcol + cell.mergeend, - cell.val, style) + val, style) else: wks.write(startrow + cell.row, startcol + cell.col, From 4d5eab12012186d2724326f7d2387089c45c0cf5 Mon Sep 17 00:00:00 2001 From: Ryan Joyce Date: Thu, 27 Jun 2019 01:24:40 -0400 Subject: [PATCH 02/10] TST: added merged cell custom object xlsxwriter test --- pandas/tests/io/excel/test_xlsxwriter.py | 29 ++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/pandas/tests/io/excel/test_xlsxwriter.py b/pandas/tests/io/excel/test_xlsxwriter.py index 391a1085161f0..fd2dd9ecf418c 100644 --- a/pandas/tests/io/excel/test_xlsxwriter.py +++ b/pandas/tests/io/excel/test_xlsxwriter.py @@ -5,6 +5,10 @@ from pandas import DataFrame from pandas.util.testing import ensure_clean +from pandas.core.indexes.multi import MultiIndex +from pandas.core.indexes.period import Period, PeriodIndex +import numpy as np + from pandas.io.excel import ExcelWriter xlsxwriter = pytest.importorskip("xlsxwriter") @@ -63,3 +67,28 @@ def test_write_append_mode_raises(ext): with ensure_clean(ext) as f: with pytest.raises(ValueError, match=msg): ExcelWriter(f, engine='xlsxwriter', mode='a') + +def test_merged_cell_custom_objects(ext): + # Test that custom object types residing within merged (grouped) + # cells are converted to python data types before being passed to + # the xlsxwriter package. Test for issue #27006 + + #create a custom object type, and place it in a grouped dataframe + pixy = PeriodIndex(['2018', '2018', '2018', '2018', + '2019', '2019', '2019', '2019'], freq='Y') + pixq = PeriodIndex(['2018Q1', '2018Q2', '2018Q3', '2018Q4', + '2019Q1', '2019Q2', '2019Q3', '2019Q4'], freq='Q') + pixarr = [pixy, pixq] + mipix = MultiIndex.from_arrays(pixarr, names=['year', 'quarter']) + df = DataFrame(np.random.rand(2, len(mipix)), columns=mipix) + + #write the dataframe to excel + try: + with ensure_clean(ext) as path: + writer = ExcelWriter(path) + df.to_excel(writer, sheet_name='test') + passed = True + except TypeError: + passed = False + + assert passed \ No newline at end of file From 91c0bfe1c0d0155dc680f8255b893275442488a8 Mon Sep 17 00:00:00 2001 From: Ryan Joyce Date: Thu, 27 Jun 2019 01:30:55 -0400 Subject: [PATCH 03/10] CLN: fixed minor PEP8 issues --- pandas/tests/io/excel/test_xlsxwriter.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/excel/test_xlsxwriter.py b/pandas/tests/io/excel/test_xlsxwriter.py index fd2dd9ecf418c..a9f453d540c92 100644 --- a/pandas/tests/io/excel/test_xlsxwriter.py +++ b/pandas/tests/io/excel/test_xlsxwriter.py @@ -68,12 +68,14 @@ def test_write_append_mode_raises(ext): with pytest.raises(ValueError, match=msg): ExcelWriter(f, engine='xlsxwriter', mode='a') + +@pytest.mark.parametrize("ext", ['test.xlsx']) def test_merged_cell_custom_objects(ext): - # Test that custom object types residing within merged (grouped) + # Test that custom object types residing within merged (grouped) # cells are converted to python data types before being passed to # the xlsxwriter package. Test for issue #27006 - #create a custom object type, and place it in a grouped dataframe + # create a custom object type, and place it in a grouped dataframe pixy = PeriodIndex(['2018', '2018', '2018', '2018', '2019', '2019', '2019', '2019'], freq='Y') pixq = PeriodIndex(['2018Q1', '2018Q2', '2018Q3', '2018Q4', @@ -82,7 +84,7 @@ def test_merged_cell_custom_objects(ext): mipix = MultiIndex.from_arrays(pixarr, names=['year', 'quarter']) df = DataFrame(np.random.rand(2, len(mipix)), columns=mipix) - #write the dataframe to excel + # write the dataframe to excel try: with ensure_clean(ext) as path: writer = ExcelWriter(path) @@ -91,4 +93,4 @@ def test_merged_cell_custom_objects(ext): except TypeError: passed = False - assert passed \ No newline at end of file + assert passed From a73332c9c54c0cd0c4737d4d3280c3e1ef02d4fc Mon Sep 17 00:00:00 2001 From: Ryan Joyce Date: Thu, 27 Jun 2019 08:22:39 -0400 Subject: [PATCH 04/10] fixed erroneous decorator --- pandas/tests/io/excel/test_xlsxwriter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/io/excel/test_xlsxwriter.py b/pandas/tests/io/excel/test_xlsxwriter.py index a9f453d540c92..bce8997e5a1b1 100644 --- a/pandas/tests/io/excel/test_xlsxwriter.py +++ b/pandas/tests/io/excel/test_xlsxwriter.py @@ -69,7 +69,6 @@ def test_write_append_mode_raises(ext): ExcelWriter(f, engine='xlsxwriter', mode='a') -@pytest.mark.parametrize("ext", ['test.xlsx']) def test_merged_cell_custom_objects(ext): # Test that custom object types residing within merged (grouped) # cells are converted to python data types before being passed to From b096deac87f3d2ffa052ef1f023e2bbd94c80aef Mon Sep 17 00:00:00 2001 From: Ryan Joyce Date: Thu, 27 Jun 2019 12:37:33 -0400 Subject: [PATCH 05/10] moved test to test_writers.py & reduced complexity --- pandas/tests/io/excel/test_writers.py | 14 +++++++++++++ pandas/tests/io/excel/test_xlsxwriter.py | 26 ------------------------ 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 961d781764b67..d97fba0eb59ce 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1162,6 +1162,20 @@ def test_path_local_path(self, engine, ext): path="foo.{ext}".format(ext=ext)) tm.assert_frame_equal(result, df) + def test_merged_cell_custom_objects(self, engine, ext): + # see GH-27006 + mipix = MultiIndex.from_tuples([(pd.Period('2018'), pd.Period('2018Q1')), + (pd.Period('2018'), pd.Period('2018Q2'))]) + expected = DataFrame(np.random.rand(2, len(mipix)), columns=mipix) + expected.to_excel(self.path) + result = pd.read_excel(self.path, header=[0, 1], index_col=0) + + # need to convert PeriodIndexes to standard Indexes for assert comparison + expected.columns.set_levels([[str(i) for i in mipix.levels[0]], + [str(i) for i in mipix.levels[1]]], + level=[0, 1], + inplace=True) + tm.assert_frame_equal(expected, result) class TestExcelWriterEngineTests: diff --git a/pandas/tests/io/excel/test_xlsxwriter.py b/pandas/tests/io/excel/test_xlsxwriter.py index bce8997e5a1b1..18e5fbcb7ba0e 100644 --- a/pandas/tests/io/excel/test_xlsxwriter.py +++ b/pandas/tests/io/excel/test_xlsxwriter.py @@ -67,29 +67,3 @@ def test_write_append_mode_raises(ext): with ensure_clean(ext) as f: with pytest.raises(ValueError, match=msg): ExcelWriter(f, engine='xlsxwriter', mode='a') - - -def test_merged_cell_custom_objects(ext): - # Test that custom object types residing within merged (grouped) - # cells are converted to python data types before being passed to - # the xlsxwriter package. Test for issue #27006 - - # create a custom object type, and place it in a grouped dataframe - pixy = PeriodIndex(['2018', '2018', '2018', '2018', - '2019', '2019', '2019', '2019'], freq='Y') - pixq = PeriodIndex(['2018Q1', '2018Q2', '2018Q3', '2018Q4', - '2019Q1', '2019Q2', '2019Q3', '2019Q4'], freq='Q') - pixarr = [pixy, pixq] - mipix = MultiIndex.from_arrays(pixarr, names=['year', 'quarter']) - df = DataFrame(np.random.rand(2, len(mipix)), columns=mipix) - - # write the dataframe to excel - try: - with ensure_clean(ext) as path: - writer = ExcelWriter(path) - df.to_excel(writer, sheet_name='test') - passed = True - except TypeError: - passed = False - - assert passed From 225a8da543525247ee0ebfeff13dc68d94231676 Mon Sep 17 00:00:00 2001 From: Ryan Joyce Date: Thu, 27 Jun 2019 13:42:24 -0400 Subject: [PATCH 06/10] minor pep8 fixes --- pandas/tests/io/excel/test_writers.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index d97fba0eb59ce..f426b56bfc97d 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1164,15 +1164,14 @@ def test_path_local_path(self, engine, ext): def test_merged_cell_custom_objects(self, engine, ext): # see GH-27006 - mipix = MultiIndex.from_tuples([(pd.Period('2018'), pd.Period('2018Q1')), - (pd.Period('2018'), pd.Period('2018Q2'))]) - expected = DataFrame(np.random.rand(2, len(mipix)), columns=mipix) + mi = MultiIndex.from_tuples([(pd.Period('2018'), pd.Period('2018Q1')), + (pd.Period('2018'), pd.Period('2018Q2'))]) + expected = DataFrame(np.random.rand(2, len(mi)), columns=mi) expected.to_excel(self.path) result = pd.read_excel(self.path, header=[0, 1], index_col=0) - - # need to convert PeriodIndexes to standard Indexes for assert comparison - expected.columns.set_levels([[str(i) for i in mipix.levels[0]], - [str(i) for i in mipix.levels[1]]], + # need to convert PeriodIndexes to standard Indexes for assert equal + expected.columns.set_levels([[str(i) for i in mi.levels[0]], + [str(i) for i in mi.levels[1]]], level=[0, 1], inplace=True) tm.assert_frame_equal(expected, result) From fd7da9218c5351c094b7a31bd1b90ead847909e9 Mon Sep 17 00:00:00 2001 From: Ryan Joyce Date: Thu, 27 Jun 2019 14:23:34 -0400 Subject: [PATCH 07/10] documented bugfix in whatsnew v0.25.0 --- doc/source/whatsnew/v0.25.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 19636f42c6129..d1d5998e9d432 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -714,6 +714,7 @@ I/O - Fixed bug in :func:`pandas.read_csv` where a BOM would result in incorrect parsing using engine='python' (:issue:`26545`) - :func:`read_excel` now raises a ``ValueError`` when input is of type :class:`pandas.io.excel.ExcelFile` and ``engine`` param is passed since :class:`pandas.io.excel.ExcelFile` has an engine defined (:issue:`26566`) - Bug while selecting from :class:`HDFStore` with ``where=''`` specified (:issue:`26610`). +- Fixed bug in :func:`DataFrame.to_excel()` where custom objects (i.e. `PeriodIndex`) inside merged cells were not being converted into types safe for the Excel writer (:issue:`27006`) Plotting ^^^^^^^^ From 8087f07783f0e7d5303a9fa74d670cd07b1e9f51 Mon Sep 17 00:00:00 2001 From: Ryan Joyce Date: Thu, 27 Jun 2019 14:54:36 -0400 Subject: [PATCH 08/10] removed unused dependencies --- pandas/tests/io/excel/test_xlsxwriter.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/tests/io/excel/test_xlsxwriter.py b/pandas/tests/io/excel/test_xlsxwriter.py index 18e5fbcb7ba0e..391a1085161f0 100644 --- a/pandas/tests/io/excel/test_xlsxwriter.py +++ b/pandas/tests/io/excel/test_xlsxwriter.py @@ -5,10 +5,6 @@ from pandas import DataFrame from pandas.util.testing import ensure_clean -from pandas.core.indexes.multi import MultiIndex -from pandas.core.indexes.period import Period, PeriodIndex -import numpy as np - from pandas.io.excel import ExcelWriter xlsxwriter = pytest.importorskip("xlsxwriter") From b16397db22d910b1ce53eacc7aa027ff9dd0b94c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 27 Jun 2019 18:17:26 -0500 Subject: [PATCH 09/10] lint --- pandas/tests/io/excel/test_writers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index e793dfe4246c1..e21dd09b66ffb 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1176,6 +1176,7 @@ def test_merged_cell_custom_objects(self, engine, ext): inplace=True) tm.assert_frame_equal(expected, result) + class TestExcelWriterEngineTests: @pytest.mark.parametrize('klass,ext', [ From 92936418c2b38aad88d4f9f55c387a4efc45f51d Mon Sep 17 00:00:00 2001 From: Ryan Joyce Date: Thu, 27 Jun 2019 20:43:36 -0400 Subject: [PATCH 10/10] remove random, add merge_cells fixture, etc --- pandas/tests/io/excel/test_writers.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index e21dd09b66ffb..a4fdcdf70a3ea 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1162,18 +1162,20 @@ def test_path_local_path(self, engine, ext): path="foo.{ext}".format(ext=ext)) tm.assert_frame_equal(result, df) - def test_merged_cell_custom_objects(self, engine, ext): + def test_merged_cell_custom_objects(self, engine, merge_cells, ext): # see GH-27006 mi = MultiIndex.from_tuples([(pd.Period('2018'), pd.Period('2018Q1')), (pd.Period('2018'), pd.Period('2018Q2'))]) - expected = DataFrame(np.random.rand(2, len(mi)), columns=mi) + expected = DataFrame(np.ones((2, 2)), columns=mi) expected.to_excel(self.path) - result = pd.read_excel(self.path, header=[0, 1], index_col=0) + result = pd.read_excel(self.path, header=[0, 1], + index_col=0, convert_float=False) # need to convert PeriodIndexes to standard Indexes for assert equal expected.columns.set_levels([[str(i) for i in mi.levels[0]], [str(i) for i in mi.levels[1]]], level=[0, 1], inplace=True) + expected.index = expected.index.astype(np.float64) tm.assert_frame_equal(expected, result)