From 526d756d903aaf6870bb03bbc6aab5610d115548 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 8 Mar 2020 12:29:43 +0100 Subject: [PATCH 01/40] WIP: unit tests are still failing for ods write to ods read loop back test --- pandas/core/config_init.py | 10 ++ pandas/io/excel/__init__.py | 4 + pandas/io/excel/_base.py | 16 ++- pandas/io/excel/_odswriter.py | 155 ++++++++++++++++++++++++++ pandas/io/excel/_util.py | 2 +- pandas/tests/io/excel/test_writers.py | 6 +- 6 files changed, 185 insertions(+), 8 deletions(-) create mode 100644 pandas/io/excel/_odswriter.py diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 5089445c79897..54d23fe8829e6 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -553,6 +553,7 @@ def use_inf_as_na_cb(key): _xls_options = ["xlwt"] _xlsm_options = ["openpyxl"] _xlsx_options = ["openpyxl", "xlsxwriter"] +_ods_options = ["odf"] with cf.config_prefix("io.excel.xls"): @@ -581,6 +582,15 @@ def use_inf_as_na_cb(key): ) +with cf.config_prefix("io.excel.ods"): + cf.register_option( + "writer", + "auto", + writer_engine_doc.format(ext="ods", others=", ".join(_ods_options)), + validator=str, + ) + + # Set up the io.parquet specific configuration. parquet_engine_doc = """ : string diff --git a/pandas/io/excel/__init__.py b/pandas/io/excel/__init__.py index 455abaa7fb589..26d152106b47f 100644 --- a/pandas/io/excel/__init__.py +++ b/pandas/io/excel/__init__.py @@ -3,6 +3,7 @@ from pandas.io.excel._util import register_writer from pandas.io.excel._xlsxwriter import _XlsxWriter from pandas.io.excel._xlwt import _XlwtWriter +from pandas.io.excel._odswriter import _ODSWriter __all__ = ["read_excel", "ExcelWriter", "ExcelFile"] @@ -14,3 +15,6 @@ register_writer(_XlsxWriter) + + +register_writer(_ODSWriter) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 6c3b49b9afc68..85a70b7efc00d 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -533,13 +533,13 @@ class ExcelWriter(metaclass=abc.ABCMeta): """ Class for writing DataFrame objects into excel sheets. - Default is to use xlwt for xls, openpyxl for xlsx. + Default is to use xlwt for xls, openpyxl for xlsx, odf for ods. See DataFrame.to_excel for typical usage. Parameters ---------- path : str - Path to xls or xlsx file. + Path to xls or xlsx or ods file. engine : str (optional) Engine to use for writing. If None, defaults to ``io.excel..writer``. NOTE: can only be passed as a keyword @@ -789,7 +789,7 @@ class ExcelFile: Parameters ---------- - io : str, path object (pathlib.Path or py._path.local.LocalPath), + path_or_io : str, path object (pathlib.Path or py._path.local.LocalPath), a file-like object, xlrd workbook or openpypl workbook. If a string or path object, expected to be a path to a .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file. @@ -816,18 +816,22 @@ class ExcelFile: "pyxlsb": _PyxlsbReader, } - def __init__(self, io, engine=None): + def __init__(self, path_or_io, engine=None): if engine is None: engine = "xlrd" + if isinstance(path_or_io, str): + ext = os.path.splitext(path_or_io)[-1][1:] + if ext == "ods": + engine = "odf" if engine not in self._engines: raise ValueError(f"Unknown engine: {engine}") self.engine = engine # Could be a str, ExcelFile, Book, etc. - self.io = io + self.io = path_or_io # Always a string - self._io = stringify_path(io) + self._io = stringify_path(path_or_io) self._reader = self._engines[engine](self._io) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py new file mode 100644 index 0000000000000..0d8e828203e4e --- /dev/null +++ b/pandas/io/excel/_odswriter.py @@ -0,0 +1,155 @@ +from collections import defaultdict + +import pandas._libs.json as json + +from pandas.io.excel._base import ExcelWriter +from pandas.io.excel._util import _validate_freeze_panes + +from odf.opendocument import OpenDocumentSpreadsheet +from odf.table import Table, TableRow, TableCell +from odf.text import P + +class _ODSWriter(ExcelWriter): + engine = "odf" + supported_extensions = (".ods",) + + def __init__(self, path, engine=None, encoding=None, mode="w", **engine_kwargs): + engine_kwargs["engine"] = engine + + if mode == "a": + raise ValueError("Append mode is not supported with odf!") + + super().__init__(path, mode=mode, **engine_kwargs) + + if encoding is None: + encoding = "ascii" + self.book = OpenDocumentSpreadsheet() +# self.fm_datetime = xlwt.easyxf(num_format_str=self.datetime_format) +# self.fm_date = xlwt.easyxf(num_format_str=self.date_format) + + def save(self): + """ + Save workbook to disk. + """ + for sheet in self.sheets.values(): + self.book.spreadsheet.addElement(sheet) + return self.book.save(self.path) + + def write_cells( + self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None + ): + # Write the frame cells using odf + + sheet_name = self._get_sheet_name(sheet_name) + + if sheet_name in self.sheets: + wks = self.sheets[sheet_name] + else: + wks = Table(name=sheet_name) +# wks = self.book.add_sheet(sheet_name) (do at the end or immediately? FIXME) + self.sheets[sheet_name] = wks + +# if _validate_freeze_panes(freeze_panes): +# wks.set_panes_frozen(True) +# wks.set_horz_split_pos(freeze_panes[0]) +# wks.set_vert_split_pos(freeze_panes[1]) + + style_dict = {} + + rows = defaultdict(TableRow) + + for cell in cells: + print(cell.row, cell.col, cell.val) + class_to_cell_type = { str: "string", int: "float", float: "float", bool: "boolean" } + val, fmt = self._value_with_fmt(cell.val) + tc = TableCell(valuetype=class_to_cell_type[type(val)], value=val) + rows[cell.row].addElement(tc) + p = P(text=val) + tc.addElement(p) + """ + stylekey = json.dumps(cell.style) + if fmt: + stylekey += fmt + + if stylekey in style_dict: + style = style_dict[stylekey] + else: + style = self._convert_to_style(cell.style, fmt) + style_dict[stylekey] = style + + if cell.mergestart is not None and cell.mergeend is not None: + wks.write_merge( + startrow + cell.row, + startrow + cell.mergestart, + startcol + cell.col, + startcol + cell.mergeend, + val, + style, + ) + else: + wks.write(startrow + cell.row, startcol + cell.col, val, style) + """ + for row in rows.values(): + wks.addElement(row) + + @classmethod + def _style_to_xlwt( + cls, item, firstlevel: bool = True, field_sep=",", line_sep=";" + ) -> str: + """ + helper which recursively generate an xlwt easy style string + for example: + + hstyle = {"font": {"bold": True}, + "border": {"top": "thin", + "right": "thin", + "bottom": "thin", + "left": "thin"}, + "align": {"horiz": "center"}} + will be converted to + font: bold on; \ + border: top thin, right thin, bottom thin, left thin; \ + align: horiz center; + """ + if hasattr(item, "items"): + if firstlevel: + it = [ + f"{key}: {cls._style_to_xlwt(value, False)}" + for key, value in item.items() + ] + out = f"{(line_sep).join(it)} " + return out + else: + it = [ + f"{key} {cls._style_to_xlwt(value, False)}" + for key, value in item.items() + ] + out = f"{(field_sep).join(it)} " + return out + else: + item = f"{item}" + item = item.replace("True", "on") + item = item.replace("False", "off") + return item + + @classmethod + def _convert_to_style(cls, style_dict, num_format_str=None): + """ + converts a style_dict to an xlwt style object + + Parameters + ---------- + style_dict : style dictionary to convert + num_format_str : optional number format string + """ + import xlwt + + if style_dict: + xlwt_stylestr = cls._style_to_xlwt(style_dict) + style = xlwt.easyxf(xlwt_stylestr, field_sep=",", line_sep=";") + else: + style = xlwt.XFStyle() + if num_format_str is not None: + style.num_format_str = num_format_str + + return style diff --git a/pandas/io/excel/_util.py b/pandas/io/excel/_util.py index 7c8e1abb497bc..bda587194c374 100644 --- a/pandas/io/excel/_util.py +++ b/pandas/io/excel/_util.py @@ -35,7 +35,7 @@ def _get_default_writer(ext): str The default engine for the extension. """ - _default_writers = {"xlsx": "openpyxl", "xlsm": "openpyxl", "xls": "xlwt"} + _default_writers = {"xlsx": "openpyxl", "xlsm": "openpyxl", "xls": "xlwt", "ods": "odf"} xlsxwriter = import_optional_dependency( "xlsxwriter", raise_on_missing=False, on_version="warn" ) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index ba759c7766fa5..d9c4b92f58be9 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -18,6 +18,7 @@ _OpenpyxlWriter, _XlsxWriter, _XlwtWriter, + _ODSWriter, register_writer, ) @@ -49,7 +50,8 @@ def set_engine(engine, ext): @td.skip_if_no("xlrd") -@pytest.mark.parametrize("ext", [".xls", ".xlsx", ".xlsm"]) +@td.skip_if_no("odf") +@pytest.mark.parametrize("ext", [".xls", ".xlsx", ".xlsm", ".ods"]) class TestRoundTrip: @td.skip_if_no("xlwt") @td.skip_if_no("openpyxl") @@ -297,6 +299,7 @@ def test_multiindex_interval_datetimes(self, ext): @td.skip_if_no("xlrd") +@td.skip_if_no("odf") @pytest.mark.parametrize( "engine,ext", [ @@ -304,6 +307,7 @@ def test_multiindex_interval_datetimes(self, ext): pytest.param("openpyxl", ".xlsm", marks=td.skip_if_no("openpyxl")), pytest.param("xlwt", ".xls", marks=td.skip_if_no("xlwt")), pytest.param("xlsxwriter", ".xlsx", marks=td.skip_if_no("xlsxwriter")), + pytest.param("odf", ".ods"), ], ) @pytest.mark.usefixtures("set_engine") From 165d887eeb4e1e6dbf4119fab60cb5344f362099 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sat, 21 Mar 2020 14:47:38 +0100 Subject: [PATCH 02/40] Create empty cells where needed --- pandas/io/excel/_odswriter.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 0d8e828203e4e..5451e7d419dae 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -57,13 +57,19 @@ def write_cells( style_dict = {} rows = defaultdict(TableRow) + col_count = defaultdict(int) for cell in cells: print(cell.row, cell.col, cell.val) + # fill with empty cells if needed + for _ in range(cell.col - col_count[cell.row]): + rows[cell.row].addElement(TableCell()) + col_count[cell.row] += 1 class_to_cell_type = { str: "string", int: "float", float: "float", bool: "boolean" } val, fmt = self._value_with_fmt(cell.val) tc = TableCell(valuetype=class_to_cell_type[type(val)], value=val) rows[cell.row].addElement(tc) + col_count[cell.row] += 1 p = P(text=val) tc.addElement(p) """ From 024cb2d2714924efc77208a7e2487b0cb2445456 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sat, 21 Mar 2020 16:46:00 +0100 Subject: [PATCH 03/40] Add support for dates --- pandas/io/excel/_odfreader.py | 15 ++++++--- pandas/io/excel/_odswriter.py | 47 +++++++++++++++++++-------- pandas/io/excel/_util.py | 7 +++- pandas/tests/io/excel/test_writers.py | 13 +++++--- 4 files changed, 60 insertions(+), 22 deletions(-) diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index be86b57ca2066..f9a998d24bddc 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -118,6 +118,7 @@ def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: if len(row) < max_row_len: row.extend([self.empty_value] * (max_row_len - len(row))) + # print(table) return table def _get_row_repeat(self, row) -> int: @@ -148,7 +149,11 @@ def _is_empty_row(self, row) -> bool: def _get_cell_value(self, cell, convert_float: bool) -> Scalar: from odf.namespaces import OFFICENS + # print("cell: ", cell, convert_float) + cell_type = cell.attributes.get((OFFICENS, "value-type")) + cell_value = cell.attributes.get((OFFICENS, "value")) + # print("type=", cell_type, "value=", repr(cell_value)) if cell_type == "boolean": if str(cell) == "TRUE": return True @@ -157,14 +162,16 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar: return self.empty_value elif cell_type == "float": # GH5394 - cell_value = float(cell.attributes.get((OFFICENS, "value"))) - - if cell_value == 0.0: # NA handling - return str(cell) + value = cell.attributes.get((OFFICENS, "value")) + if value == "": # NA handling + return "" + cell_value = float(cell_value) if convert_float: + # print("convert", cell_value, int(cell_value)) val = int(cell_value) if val == cell_value: + # print("return the int") return val return cell_value elif cell_type == "percentage": diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 5451e7d419dae..68d5091351f39 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -1,14 +1,16 @@ from collections import defaultdict +import datetime -import pandas._libs.json as json +# import pandas._libs.json as json from pandas.io.excel._base import ExcelWriter -from pandas.io.excel._util import _validate_freeze_panes +# from pandas.io.excel._util import _validate_freeze_panes from odf.opendocument import OpenDocumentSpreadsheet from odf.table import Table, TableRow, TableCell from odf.text import P + class _ODSWriter(ExcelWriter): engine = "odf" supported_extensions = (".ods",) @@ -24,8 +26,9 @@ def __init__(self, path, engine=None, encoding=None, mode="w", **engine_kwargs): if encoding is None: encoding = "ascii" self.book = OpenDocumentSpreadsheet() -# self.fm_datetime = xlwt.easyxf(num_format_str=self.datetime_format) -# self.fm_date = xlwt.easyxf(num_format_str=self.date_format) + + # self.fm_datetime = xlwt.easyxf(num_format_str=self.datetime_format) + # self.fm_date = xlwt.easyxf(num_format_str=self.date_format) def save(self): """ @@ -46,13 +49,13 @@ def write_cells( wks = self.sheets[sheet_name] else: wks = Table(name=sheet_name) -# wks = self.book.add_sheet(sheet_name) (do at the end or immediately? FIXME) + # wks = self.book.add_sheet(sheet_name) (do at the end or immediately? FIXME) self.sheets[sheet_name] = wks -# if _validate_freeze_panes(freeze_panes): -# wks.set_panes_frozen(True) -# wks.set_horz_split_pos(freeze_panes[0]) -# wks.set_vert_split_pos(freeze_panes[1]) + # if _validate_freeze_panes(freeze_panes): + # wks.set_panes_frozen(True) + # wks.set_horz_split_pos(freeze_panes[0]) + # wks.set_vert_split_pos(freeze_panes[1]) style_dict = {} @@ -60,17 +63,35 @@ def write_cells( col_count = defaultdict(int) for cell in cells: - print(cell.row, cell.col, cell.val) + # print(cell.row, cell.col, cell.val) # fill with empty cells if needed for _ in range(cell.col - col_count[cell.row]): rows[cell.row].addElement(TableCell()) col_count[cell.row] += 1 - class_to_cell_type = { str: "string", int: "float", float: "float", bool: "boolean" } + class_to_cell_type = { + str: "string", + int: "float", + float: "float", + bool: "boolean", + } val, fmt = self._value_with_fmt(cell.val) - tc = TableCell(valuetype=class_to_cell_type[type(val)], value=val) + # print("type", type(val), "value", val) + value = val + if isinstance(val, bool): + value = str(val).lower() + # if isinstance(val, datetime.date): + # tc = TableCell(valuetype="date", + if isinstance(val, datetime.date): + print('date', val.strftime("%Y-%m-%d"), val.strftime("%x")) + value = val.strftime("%Y-%m-%d") + tc = TableCell(valuetype="date", datevalue=value) + else: + tc = TableCell(valuetype=class_to_cell_type[type(val)], value=value) rows[cell.row].addElement(tc) col_count[cell.row] += 1 - p = P(text=val) + if isinstance(val, bool): + value = str(val).upper() + p = P(text=value) tc.addElement(p) """ stylekey = json.dumps(cell.style) diff --git a/pandas/io/excel/_util.py b/pandas/io/excel/_util.py index bda587194c374..285aeaf7d4c6e 100644 --- a/pandas/io/excel/_util.py +++ b/pandas/io/excel/_util.py @@ -35,7 +35,12 @@ def _get_default_writer(ext): str The default engine for the extension. """ - _default_writers = {"xlsx": "openpyxl", "xlsm": "openpyxl", "xls": "xlwt", "ods": "odf"} + _default_writers = { + "xlsx": "openpyxl", + "xlsm": "openpyxl", + "xls": "xlwt", + "ods": "odf", + } xlsxwriter = import_optional_dependency( "xlsxwriter", raise_on_missing=False, on_version="warn" ) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index d9c4b92f58be9..c250598dc5769 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -330,7 +330,7 @@ def test_excel_sheet_size(self, path): with pytest.raises(ValueError, match=msg): col_df.to_excel(path) - def test_excel_sheet_by_name_raise(self, path): + def test_excel_sheet_by_name_raise(self, path, engine): import xlrd gt = DataFrame(np.random.randn(10, 2)) @@ -341,9 +341,14 @@ def test_excel_sheet_by_name_raise(self, path): tm.assert_frame_equal(gt, df) - msg = "No sheet named <'0'>" - with pytest.raises(xlrd.XLRDError, match=msg): - pd.read_excel(xl, sheet_name="0") + if engine == "odf": + msg = "sheet 0 not found" + with pytest.raises(ValueError, match=msg): + pd.read_excel(xl, "0") + else: + msg = "No sheet named <'0'>" + with pytest.raises(xlrd.XLRDError, match=msg): + pd.read_excel(xl, sheet_name="0") def test_excel_writer_context_manager(self, frame, path): with ExcelWriter(path) as writer: From 341b77c89feb33f2777185f006fd93c8db778631 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sat, 21 Mar 2020 17:50:21 +0100 Subject: [PATCH 04/40] More date/datetime fixes --- pandas/io/excel/_odswriter.py | 37 ++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 68d5091351f39..2b26b71c2c363 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -68,30 +68,39 @@ def write_cells( for _ in range(cell.col - col_count[cell.row]): rows[cell.row].addElement(TableCell()) col_count[cell.row] += 1 - class_to_cell_type = { - str: "string", - int: "float", - float: "float", - bool: "boolean", - } val, fmt = self._value_with_fmt(cell.val) - # print("type", type(val), "value", val) - value = val + print("type", type(val), "value", val) + pvalue = value = val if isinstance(val, bool): value = str(val).lower() - # if isinstance(val, datetime.date): - # tc = TableCell(valuetype="date", - if isinstance(val, datetime.date): + pvalue = str(val).upper() + if isinstance(val, datetime.datetime): + print('datetime', val.strftime("%Y-%m-%d"), val.strftime("%x")) + if val.time(): + value = val.isoformat() + pvalue = val.strftime("%c") + else: + value = val.strftime("%Y-%m-%d") + pvalue = val.strftime("%x") + tc = TableCell(valuetype="date", datevalue=value) + elif isinstance(val, datetime.date): print('date', val.strftime("%Y-%m-%d"), val.strftime("%x")) value = val.strftime("%Y-%m-%d") + pvalue = val.strftime("%x") +# value = val.isoformat() +# pvalue = val.strftime("%c") tc = TableCell(valuetype="date", datevalue=value) else: + class_to_cell_type = { + str: "string", + int: "float", + float: "float", + bool: "boolean", + } tc = TableCell(valuetype=class_to_cell_type[type(val)], value=value) rows[cell.row].addElement(tc) col_count[cell.row] += 1 - if isinstance(val, bool): - value = str(val).upper() - p = P(text=value) + p = P(text=pvalue) tc.addElement(p) """ stylekey = json.dumps(cell.style) From 1ead9f092e6ce7ef58c5fc4de6104452ec39f773 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sat, 21 Mar 2020 18:15:23 +0100 Subject: [PATCH 05/40] Make sure the cells and columns are sorted before writing them out --- pandas/io/excel/_odswriter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 2b26b71c2c363..c165e0d2af4ea 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -62,7 +62,7 @@ def write_cells( rows = defaultdict(TableRow) col_count = defaultdict(int) - for cell in cells: + for cell in sorted(cells, key=lambda cell: (cell.row, cell.col)): # print(cell.row, cell.col, cell.val) # fill with empty cells if needed for _ in range(cell.col - col_count[cell.row]): From df321b6b1f2a35f264cfbb476b3e452b0d8a8592 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sat, 21 Mar 2020 21:46:59 +0100 Subject: [PATCH 06/40] Pass explicit engine for reading ods files --- pandas/tests/io/excel/test_writers.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index c250598dc5769..4a0adbce8e575 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1202,7 +1202,9 @@ def test_bytes_io(self, engine): writer.save() bio.seek(0) - reread_df = pd.read_excel(bio, index_col=0) + if engine != "odf": + engine = None + reread_df = pd.read_excel(bio, engine=engine, index_col=0) tm.assert_frame_equal(df, reread_df) def test_write_lists_dict(self, path): @@ -1254,16 +1256,20 @@ def test_path_path_lib(self, engine, ext): df = tm.makeDataFrame() writer = partial(df.to_excel, engine=engine) - reader = partial(pd.read_excel, index_col=0) - result = tm.round_trip_pathlib(writer, reader, path=f"foo.{ext}") + if engine != "odf": + engine = None + reader = partial(pd.read_excel, engine=engine, index_col=0) + result = tm.round_trip_pathlib(writer, reader, path=f"foo{ext}") tm.assert_frame_equal(result, df) def test_path_local_path(self, engine, ext): df = tm.makeDataFrame() writer = partial(df.to_excel, engine=engine) - reader = partial(pd.read_excel, index_col=0) - result = tm.round_trip_pathlib(writer, reader, path=f"foo.{ext}") + if engine != "odf": + engine = None + reader = partial(pd.read_excel, engine=engine, index_col=0) + result = tm.round_trip_localpath(writer, reader, path=f"foo{ext}") tm.assert_frame_equal(result, df) def test_merged_cell_custom_objects(self, merge_cells, path): From fbc5b3ebbfdbda8a3200a19133f2de0733dba30a Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sat, 21 Mar 2020 21:47:47 +0100 Subject: [PATCH 07/40] Only check extensions when there is a file with an extension --- pandas/io/excel/_base.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 85a70b7efc00d..371fbaeccdd87 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -692,10 +692,7 @@ def __init__( # validate that this engine can handle the extension if isinstance(path, str): ext = os.path.splitext(path)[-1] - else: - ext = "xls" if engine == "xlwt" else "xlsx" - - self.check_extension(ext) + self.check_extension(ext) self.path = path self.sheets = {} From 1303b85e69152f2a5f097258e13a6d0abac83144 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sat, 21 Mar 2020 22:29:01 +0100 Subject: [PATCH 08/40] Fix #N/A handling --- pandas/io/excel/_odfreader.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index f9a998d24bddc..9b25b4a83026f 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -1,5 +1,6 @@ from typing import List, cast +import numpy as np from pandas._typing import FilePathOrBuffer, Scalar from pandas.compat._optional import import_optional_dependency @@ -149,10 +150,11 @@ def _is_empty_row(self, row) -> bool: def _get_cell_value(self, cell, convert_float: bool) -> Scalar: from odf.namespaces import OFFICENS - # print("cell: ", cell, convert_float) + # print("\ncell: ", cell, convert_float) + if str(cell) == "#N/A": + return np.nan cell_type = cell.attributes.get((OFFICENS, "value-type")) - cell_value = cell.attributes.get((OFFICENS, "value")) # print("type=", cell_type, "value=", repr(cell_value)) if cell_type == "boolean": if str(cell) == "TRUE": @@ -162,16 +164,11 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar: return self.empty_value elif cell_type == "float": # GH5394 - - value = cell.attributes.get((OFFICENS, "value")) - if value == "": # NA handling - return "" - cell_value = float(cell_value) + cell_value = float(cell.attributes.get((OFFICENS, "value"))) + # print("value = ", value) if convert_float: - # print("convert", cell_value, int(cell_value)) val = int(cell_value) if val == cell_value: - # print("return the int") return val return cell_value elif cell_type == "percentage": From 4cae5648af21ce79b9a64068c135bb8bc2760f7e Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 22 Mar 2020 13:09:01 +0100 Subject: [PATCH 09/40] Add support for merged cells and skipped rows --- pandas/io/excel/_odswriter.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index c165e0d2af4ea..dacebd8077a48 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -63,7 +63,11 @@ def write_cells( col_count = defaultdict(int) for cell in sorted(cells, key=lambda cell: (cell.row, cell.col)): - # print(cell.row, cell.col, cell.val) + attributes = {} + print(cell.row, cell.col, cell.val, cell.mergestart, cell.mergeend) + if cell.mergestart is not None and cell.mergeend is not None: + attributes = {"numberrowsspanned": max(1, cell.mergestart), + "numbercolumnsspanned": cell.mergeend} # fill with empty cells if needed for _ in range(cell.col - col_count[cell.row]): rows[cell.row].addElement(TableCell()) @@ -82,14 +86,14 @@ def write_cells( else: value = val.strftime("%Y-%m-%d") pvalue = val.strftime("%x") - tc = TableCell(valuetype="date", datevalue=value) + tc = TableCell(valuetype="date", datevalue=value, attributes=attributes) elif isinstance(val, datetime.date): print('date', val.strftime("%Y-%m-%d"), val.strftime("%x")) value = val.strftime("%Y-%m-%d") pvalue = val.strftime("%x") # value = val.isoformat() # pvalue = val.strftime("%c") - tc = TableCell(valuetype="date", datevalue=value) + tc = TableCell(valuetype="date", datevalue=value, attributes=attributes) else: class_to_cell_type = { str: "string", @@ -97,7 +101,7 @@ def write_cells( float: "float", bool: "boolean", } - tc = TableCell(valuetype=class_to_cell_type[type(val)], value=value) + tc = TableCell(valuetype=class_to_cell_type[type(val)], value=value, attributes=attributes) rows[cell.row].addElement(tc) col_count[cell.row] += 1 p = P(text=pvalue) @@ -125,8 +129,8 @@ def write_cells( else: wks.write(startrow + cell.row, startcol + cell.col, val, style) """ - for row in rows.values(): - wks.addElement(row) + for row_nr in range(max(rows.keys()) + 1): + wks.addElement(rows[row_nr]) @classmethod def _style_to_xlwt( From bd78fae52976257f458615f99e587eaa597eea2a Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 22 Mar 2020 13:52:02 +0100 Subject: [PATCH 10/40] Clean up code --- pandas/io/excel/_odswriter.py | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index dacebd8077a48..8ab565cfb37e6 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -4,6 +4,7 @@ # import pandas._libs.json as json from pandas.io.excel._base import ExcelWriter + # from pandas.io.excel._util import _validate_freeze_panes from odf.opendocument import OpenDocumentSpreadsheet @@ -49,7 +50,6 @@ def write_cells( wks = self.sheets[sheet_name] else: wks = Table(name=sheet_name) - # wks = self.book.add_sheet(sheet_name) (do at the end or immediately? FIXME) self.sheets[sheet_name] = wks # if _validate_freeze_panes(freeze_panes): @@ -66,8 +66,10 @@ def write_cells( attributes = {} print(cell.row, cell.col, cell.val, cell.mergestart, cell.mergeend) if cell.mergestart is not None and cell.mergeend is not None: - attributes = {"numberrowsspanned": max(1, cell.mergestart), - "numbercolumnsspanned": cell.mergeend} + attributes = { + "numberrowsspanned": max(1, cell.mergestart), + "numbercolumnsspanned": cell.mergeend, + } # fill with empty cells if needed for _ in range(cell.col - col_count[cell.row]): rows[cell.row].addElement(TableCell()) @@ -79,7 +81,6 @@ def write_cells( value = str(val).lower() pvalue = str(val).upper() if isinstance(val, datetime.datetime): - print('datetime', val.strftime("%Y-%m-%d"), val.strftime("%x")) if val.time(): value = val.isoformat() pvalue = val.strftime("%c") @@ -88,11 +89,8 @@ def write_cells( pvalue = val.strftime("%x") tc = TableCell(valuetype="date", datevalue=value, attributes=attributes) elif isinstance(val, datetime.date): - print('date', val.strftime("%Y-%m-%d"), val.strftime("%x")) value = val.strftime("%Y-%m-%d") pvalue = val.strftime("%x") -# value = val.isoformat() -# pvalue = val.strftime("%c") tc = TableCell(valuetype="date", datevalue=value, attributes=attributes) else: class_to_cell_type = { @@ -101,7 +99,11 @@ def write_cells( float: "float", bool: "boolean", } - tc = TableCell(valuetype=class_to_cell_type[type(val)], value=value, attributes=attributes) + tc = TableCell( + valuetype=class_to_cell_type[type(val)], + value=value, + attributes=attributes, + ) rows[cell.row].addElement(tc) col_count[cell.row] += 1 p = P(text=pvalue) @@ -116,18 +118,6 @@ def write_cells( else: style = self._convert_to_style(cell.style, fmt) style_dict[stylekey] = style - - if cell.mergestart is not None and cell.mergeend is not None: - wks.write_merge( - startrow + cell.row, - startrow + cell.mergestart, - startcol + cell.col, - startcol + cell.mergeend, - val, - style, - ) - else: - wks.write(startrow + cell.row, startcol + cell.col, val, style) """ for row_nr in range(max(rows.keys()) + 1): wks.addElement(rows[row_nr]) From 5b9427ff373df6d7c32ee6b7d36569cbcd179819 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 22 Mar 2020 15:33:26 +0100 Subject: [PATCH 11/40] Implement styling --- pandas/io/excel/_odswriter.py | 125 +++++++++++----------------------- 1 file changed, 41 insertions(+), 84 deletions(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 8ab565cfb37e6..90608fc32f074 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -1,13 +1,14 @@ from collections import defaultdict import datetime -# import pandas._libs.json as json +import pandas._libs.json as json from pandas.io.excel._base import ExcelWriter # from pandas.io.excel._util import _validate_freeze_panes from odf.opendocument import OpenDocumentSpreadsheet +from odf.style import Style, TextProperties, TableCellProperties, ParagraphProperties from odf.table import Table, TableRow, TableCell from odf.text import P @@ -24,12 +25,8 @@ def __init__(self, path, engine=None, encoding=None, mode="w", **engine_kwargs): super().__init__(path, mode=mode, **engine_kwargs) - if encoding is None: - encoding = "ascii" self.book = OpenDocumentSpreadsheet() - - # self.fm_datetime = xlwt.easyxf(num_format_str=self.datetime_format) - # self.fm_date = xlwt.easyxf(num_format_str=self.date_format) + self.style_dict = {} def save(self): """ @@ -57,25 +54,25 @@ def write_cells( # wks.set_horz_split_pos(freeze_panes[0]) # wks.set_vert_split_pos(freeze_panes[1]) - style_dict = {} rows = defaultdict(TableRow) col_count = defaultdict(int) for cell in sorted(cells, key=lambda cell: (cell.row, cell.col)): attributes = {} + style_name = self._process_style(cell.style) + if style_name is not None: + attributes["stylename"] = style_name print(cell.row, cell.col, cell.val, cell.mergestart, cell.mergeend) if cell.mergestart is not None and cell.mergeend is not None: - attributes = { - "numberrowsspanned": max(1, cell.mergestart), - "numbercolumnsspanned": cell.mergeend, - } + attributes["numberrowsspanned"] = max(1, cell.mergestart) + attributes["numbercolumnsspanned"] = cell.mergeend # fill with empty cells if needed for _ in range(cell.col - col_count[cell.row]): rows[cell.row].addElement(TableCell()) col_count[cell.row] += 1 val, fmt = self._value_with_fmt(cell.val) - print("type", type(val), "value", val) + # print("type", type(val), "value", val) pvalue = value = val if isinstance(val, bool): value = str(val).lower() @@ -108,78 +105,38 @@ def write_cells( col_count[cell.row] += 1 p = P(text=pvalue) tc.addElement(p) - """ - stylekey = json.dumps(cell.style) - if fmt: - stylekey += fmt - - if stylekey in style_dict: - style = style_dict[stylekey] - else: - style = self._convert_to_style(cell.style, fmt) - style_dict[stylekey] = style - """ for row_nr in range(max(rows.keys()) + 1): wks.addElement(rows[row_nr]) - @classmethod - def _style_to_xlwt( - cls, item, firstlevel: bool = True, field_sep=",", line_sep=";" - ) -> str: - """ - helper which recursively generate an xlwt easy style string - for example: - - hstyle = {"font": {"bold": True}, - "border": {"top": "thin", - "right": "thin", - "bottom": "thin", - "left": "thin"}, - "align": {"horiz": "center"}} - will be converted to - font: bold on; \ - border: top thin, right thin, bottom thin, left thin; \ - align: horiz center; - """ - if hasattr(item, "items"): - if firstlevel: - it = [ - f"{key}: {cls._style_to_xlwt(value, False)}" - for key, value in item.items() - ] - out = f"{(line_sep).join(it)} " - return out - else: - it = [ - f"{key} {cls._style_to_xlwt(value, False)}" - for key, value in item.items() - ] - out = f"{(field_sep).join(it)} " - return out - else: - item = f"{item}" - item = item.replace("True", "on") - item = item.replace("False", "off") - return item - - @classmethod - def _convert_to_style(cls, style_dict, num_format_str=None): - """ - converts a style_dict to an xlwt style object - - Parameters - ---------- - style_dict : style dictionary to convert - num_format_str : optional number format string - """ - import xlwt - - if style_dict: - xlwt_stylestr = cls._style_to_xlwt(style_dict) - style = xlwt.easyxf(xlwt_stylestr, field_sep=",", line_sep=";") - else: - style = xlwt.XFStyle() - if num_format_str is not None: - style.num_format_str = num_format_str - - return style + def _process_style(self, style): + if style is None: + return None + style_key = json.dumps(style) + if style_key in self.style_dict: + return self.style_dict[style_key] + name = f"pd{len(self.style_dict)+1}" + self.style_dict[style_key] = name + odf_style = Style(name=name, family="table-cell") + if "font" in style: + font = style["font"] + if font.get("bold", False): + odf_style.addElement(TextProperties(fontweight="bold")) + if "borders" in style: + borders = style["borders"] + for side, thickness in borders.items(): + thickness_translation = { + "thin": "0.75pt solid #000000" + } + odf_style.addElement( + TableCellProperties( + attributes={f"border{side}": thickness_translation[thickness]})) + if "alignment" in style: + alignment = style["alignment"] + horizontal = alignment.get("horizontal") + if horizontal: + odf_style.addElement(ParagraphProperties(textalign=horizontal)) + vertical = alignment.get("vertical") + if vertical: + odf_style.addElement(TableCellProperties(verticalalign=vertical)) + self.book.styles.addElement(odf_style) + return name From 3a8a06bc3f002a3343e6661138c299bc98c84ee2 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 22 Mar 2020 20:29:31 +0100 Subject: [PATCH 12/40] Refactor a bit to make a bit more readable --- pandas/io/excel/_odswriter.py | 136 ++++++++++++++++++++++------------ 1 file changed, 88 insertions(+), 48 deletions(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 90608fc32f074..11d15634a0dd1 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -5,12 +5,13 @@ from pandas.io.excel._base import ExcelWriter -# from pandas.io.excel._util import _validate_freeze_panes +from pandas.io.excel._util import _validate_freeze_panes from odf.opendocument import OpenDocumentSpreadsheet from odf.style import Style, TextProperties, TableCellProperties, ParagraphProperties from odf.table import Table, TableRow, TableCell from odf.text import P +from odf.config import ConfigItemSet, ConfigItemMapEntry, ConfigItemMapNamed, ConfigItem, ConfigItemMapIndexed class _ODSWriter(ExcelWriter): @@ -36,11 +37,11 @@ def save(self): self.book.spreadsheet.addElement(sheet) return self.book.save(self.path) - def write_cells( - self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None - ): + def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, + freeze_panes=None): # Write the frame cells using odf - + # assert startrow == 0 + # assert startcol == 0 sheet_name = self._get_sheet_name(sheet_name) if sheet_name in self.sheets: @@ -49,65 +50,69 @@ def write_cells( wks = Table(name=sheet_name) self.sheets[sheet_name] = wks - # if _validate_freeze_panes(freeze_panes): - # wks.set_panes_frozen(True) - # wks.set_horz_split_pos(freeze_panes[0]) - # wks.set_vert_split_pos(freeze_panes[1]) - + if _validate_freeze_panes(freeze_panes): + self._create_freeze_panes(sheet_name, freeze_panes) rows = defaultdict(TableRow) col_count = defaultdict(int) for cell in sorted(cells, key=lambda cell: (cell.row, cell.col)): - attributes = {} - style_name = self._process_style(cell.style) - if style_name is not None: - attributes["stylename"] = style_name - print(cell.row, cell.col, cell.val, cell.mergestart, cell.mergeend) - if cell.mergestart is not None and cell.mergeend is not None: - attributes["numberrowsspanned"] = max(1, cell.mergestart) - attributes["numbercolumnsspanned"] = cell.mergeend # fill with empty cells if needed for _ in range(cell.col - col_count[cell.row]): rows[cell.row].addElement(TableCell()) col_count[cell.row] += 1 - val, fmt = self._value_with_fmt(cell.val) - # print("type", type(val), "value", val) - pvalue = value = val - if isinstance(val, bool): - value = str(val).lower() - pvalue = str(val).upper() - if isinstance(val, datetime.datetime): - if val.time(): - value = val.isoformat() - pvalue = val.strftime("%c") - else: - value = val.strftime("%Y-%m-%d") - pvalue = val.strftime("%x") - tc = TableCell(valuetype="date", datevalue=value, attributes=attributes) - elif isinstance(val, datetime.date): - value = val.strftime("%Y-%m-%d") - pvalue = val.strftime("%x") - tc = TableCell(valuetype="date", datevalue=value, attributes=attributes) - else: - class_to_cell_type = { - str: "string", - int: "float", - float: "float", - bool: "boolean", - } - tc = TableCell( - valuetype=class_to_cell_type[type(val)], - value=value, - attributes=attributes, - ) + + pvalue, tc = self._make_table_cell(cell) rows[cell.row].addElement(tc) col_count[cell.row] += 1 p = P(text=pvalue) tc.addElement(p) + + # add all rows to the sheet for row_nr in range(max(rows.keys()) + 1): wks.addElement(rows[row_nr]) + def _make_table_cell_attributes(self, cell): + attributes = {} + style_name = self._process_style(cell.style) + if style_name is not None: + attributes["stylename"] = style_name + if cell.mergestart is not None and cell.mergeend is not None: + attributes["numberrowsspanned"] = max(1, cell.mergestart) + attributes["numbercolumnsspanned"] = cell.mergeend + return attributes + + def _make_table_cell(self, cell): + attributes = self._make_table_cell_attributes(cell) + val, fmt = self._value_with_fmt(cell.val) + pvalue = value = val + if isinstance(val, bool): + value = str(val).lower() + pvalue = str(val).upper() + if isinstance(val, datetime.datetime): + if val.time(): + value = val.isoformat() + pvalue = val.strftime("%c") + else: + value = val.strftime("%Y-%m-%d") + pvalue = val.strftime("%x") + return pvalue, TableCell(valuetype="date", datevalue=value, + attributes=attributes) + elif isinstance(val, datetime.date): + value = val.strftime("%Y-%m-%d") + pvalue = val.strftime("%x") + return pvalue, TableCell(valuetype="date", datevalue=value, + attributes=attributes) + else: + class_to_cell_type = { + str: "string", + int: "float", + float: "float", + bool: "boolean", + } + return pvalue, TableCell(valuetype=class_to_cell_type[type(val)], + value=value, attributes=attributes) + def _process_style(self, style): if style is None: return None @@ -140,3 +145,38 @@ def _process_style(self, style): odf_style.addElement(TableCellProperties(verticalalign=vertical)) self.book.styles.addElement(odf_style) return name + + def _create_freeze_panes(self, sheet_name, freeze_panes): + config_item_set = ConfigItemSet(name="ooo:view-settings") + self.book.settings.addElement(config_item_set) + + config_item_map_indexed = ConfigItemMapIndexed(name="Views") + config_item_set.addElement(config_item_map_indexed) + + config_item_map_entry = ConfigItemMapEntry() + config_item_map_indexed.addElement(config_item_map_entry) + + config_item_map_named = ConfigItemMapNamed(name="Tables") + config_item_map_entry.addElement(config_item_map_named) + + config_item_map_entry = ConfigItemMapEntry(name=sheet_name) + config_item_map_named.addElement(config_item_map_entry) + + config_item_map_entry.addElement(ConfigItem(name="HorizontalSplitMode", + type="short", + text="2")) + config_item_map_entry.addElement(ConfigItem(name="VerticalSplitMode", + type="short", + text="2")) + config_item_map_entry.addElement(ConfigItem(name="HorizontalSplitPosition", + type="int", + text=str(freeze_panes[0]))) + config_item_map_entry.addElement(ConfigItem(name="VerticalSplitPosition", + type="int", + text=str(freeze_panes[1]))) + config_item_map_entry.addElement(ConfigItem(name="PositionRight", + type="int", + text=str(freeze_panes[0]))) + config_item_map_entry.addElement(ConfigItem(name="PositionBottom", + type="int", + text=str(freeze_panes[1]))) From 4d6ca30dde9e903352a363a8484645c13486bebd Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 22 Mar 2020 20:30:39 +0100 Subject: [PATCH 13/40] black reformatting --- pandas/io/excel/_odswriter.py | 83 ++++++++++++++++++++++------------- 1 file changed, 52 insertions(+), 31 deletions(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 11d15634a0dd1..2045e3c7e9be3 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -11,7 +11,13 @@ from odf.style import Style, TextProperties, TableCellProperties, ParagraphProperties from odf.table import Table, TableRow, TableCell from odf.text import P -from odf.config import ConfigItemSet, ConfigItemMapEntry, ConfigItemMapNamed, ConfigItem, ConfigItemMapIndexed +from odf.config import ( + ConfigItemSet, + ConfigItemMapEntry, + ConfigItemMapNamed, + ConfigItem, + ConfigItemMapIndexed, +) class _ODSWriter(ExcelWriter): @@ -37,8 +43,9 @@ def save(self): self.book.spreadsheet.addElement(sheet) return self.book.save(self.path) - def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, - freeze_panes=None): + def write_cells( + self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None + ): # Write the frame cells using odf # assert startrow == 0 # assert startcol == 0 @@ -96,13 +103,17 @@ def _make_table_cell(self, cell): else: value = val.strftime("%Y-%m-%d") pvalue = val.strftime("%x") - return pvalue, TableCell(valuetype="date", datevalue=value, - attributes=attributes) + return ( + pvalue, + TableCell(valuetype="date", datevalue=value, attributes=attributes), + ) elif isinstance(val, datetime.date): value = val.strftime("%Y-%m-%d") pvalue = val.strftime("%x") - return pvalue, TableCell(valuetype="date", datevalue=value, - attributes=attributes) + return ( + pvalue, + TableCell(valuetype="date", datevalue=value, attributes=attributes), + ) else: class_to_cell_type = { str: "string", @@ -110,8 +121,14 @@ def _make_table_cell(self, cell): float: "float", bool: "boolean", } - return pvalue, TableCell(valuetype=class_to_cell_type[type(val)], - value=value, attributes=attributes) + return ( + pvalue, + TableCell( + valuetype=class_to_cell_type[type(val)], + value=value, + attributes=attributes, + ), + ) def _process_style(self, style): if style is None: @@ -129,12 +146,12 @@ def _process_style(self, style): if "borders" in style: borders = style["borders"] for side, thickness in borders.items(): - thickness_translation = { - "thin": "0.75pt solid #000000" - } + thickness_translation = {"thin": "0.75pt solid #000000"} odf_style.addElement( TableCellProperties( - attributes={f"border{side}": thickness_translation[thickness]})) + attributes={f"border{side}": thickness_translation[thickness]} + ) + ) if "alignment" in style: alignment = style["alignment"] horizontal = alignment.get("horizontal") @@ -162,21 +179,25 @@ def _create_freeze_panes(self, sheet_name, freeze_panes): config_item_map_entry = ConfigItemMapEntry(name=sheet_name) config_item_map_named.addElement(config_item_map_entry) - config_item_map_entry.addElement(ConfigItem(name="HorizontalSplitMode", - type="short", - text="2")) - config_item_map_entry.addElement(ConfigItem(name="VerticalSplitMode", - type="short", - text="2")) - config_item_map_entry.addElement(ConfigItem(name="HorizontalSplitPosition", - type="int", - text=str(freeze_panes[0]))) - config_item_map_entry.addElement(ConfigItem(name="VerticalSplitPosition", - type="int", - text=str(freeze_panes[1]))) - config_item_map_entry.addElement(ConfigItem(name="PositionRight", - type="int", - text=str(freeze_panes[0]))) - config_item_map_entry.addElement(ConfigItem(name="PositionBottom", - type="int", - text=str(freeze_panes[1]))) + config_item_map_entry.addElement( + ConfigItem(name="HorizontalSplitMode", type="short", text="2") + ) + config_item_map_entry.addElement( + ConfigItem(name="VerticalSplitMode", type="short", text="2") + ) + config_item_map_entry.addElement( + ConfigItem( + name="HorizontalSplitPosition", type="int", text=str(freeze_panes[0]) + ) + ) + config_item_map_entry.addElement( + ConfigItem( + name="VerticalSplitPosition", type="int", text=str(freeze_panes[1]) + ) + ) + config_item_map_entry.addElement( + ConfigItem(name="PositionRight", type="int", text=str(freeze_panes[0])) + ) + config_item_map_entry.addElement( + ConfigItem(name="PositionBottom", type="int", text=str(freeze_panes[1])) + ) From 736ec57104453f4886aa5add961c7c4b6fceff51 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 22 Mar 2020 21:03:16 +0100 Subject: [PATCH 14/40] flake8 and isort fixes --- pandas/io/excel/__init__.py | 2 +- pandas/io/excel/_odfreader.py | 1 + pandas/io/excel/_odswriter.py | 25 ++++++++++++------------- pandas/tests/io/excel/test_writers.py | 1 - 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/pandas/io/excel/__init__.py b/pandas/io/excel/__init__.py index 26d152106b47f..d035223957a76 100644 --- a/pandas/io/excel/__init__.py +++ b/pandas/io/excel/__init__.py @@ -1,9 +1,9 @@ from pandas.io.excel._base import ExcelFile, ExcelWriter, read_excel +from pandas.io.excel._odswriter import _ODSWriter from pandas.io.excel._openpyxl import _OpenpyxlWriter from pandas.io.excel._util import register_writer from pandas.io.excel._xlsxwriter import _XlsxWriter from pandas.io.excel._xlwt import _XlwtWriter -from pandas.io.excel._odswriter import _ODSWriter __all__ = ["read_excel", "ExcelWriter", "ExcelFile"] diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 9b25b4a83026f..a7403a5289eae 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -1,6 +1,7 @@ from typing import List, cast import numpy as np + from pandas._typing import FilePathOrBuffer, Scalar from pandas.compat._optional import import_optional_dependency diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 2045e3c7e9be3..511cba40484a2 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -1,23 +1,22 @@ from collections import defaultdict import datetime -import pandas._libs.json as json - -from pandas.io.excel._base import ExcelWriter - -from pandas.io.excel._util import _validate_freeze_panes - -from odf.opendocument import OpenDocumentSpreadsheet -from odf.style import Style, TextProperties, TableCellProperties, ParagraphProperties -from odf.table import Table, TableRow, TableCell -from odf.text import P from odf.config import ( - ConfigItemSet, - ConfigItemMapEntry, - ConfigItemMapNamed, ConfigItem, + ConfigItemMapEntry, ConfigItemMapIndexed, + ConfigItemMapNamed, + ConfigItemSet, ) +from odf.opendocument import OpenDocumentSpreadsheet +from odf.style import ParagraphProperties, Style, TableCellProperties, TextProperties +from odf.table import Table, TableCell, TableRow +from odf.text import P + +import pandas._libs.json as json + +from pandas.io.excel._base import ExcelWriter +from pandas.io.excel._util import _validate_freeze_panes class _ODSWriter(ExcelWriter): diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 4a0adbce8e575..2ada04179daae 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -18,7 +18,6 @@ _OpenpyxlWriter, _XlsxWriter, _XlwtWriter, - _ODSWriter, register_writer, ) From 9458d4fabb04fc0e991065809447235465e7ec6d Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 22 Mar 2020 22:48:05 +0100 Subject: [PATCH 15/40] Typing validation fixes --- pandas/io/excel/_odswriter.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 511cba40484a2..d5c919060ed67 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -1,5 +1,6 @@ from collections import defaultdict import datetime +from typing import DefaultDict from odf.config import ( ConfigItem, @@ -32,7 +33,7 @@ def __init__(self, path, engine=None, encoding=None, mode="w", **engine_kwargs): super().__init__(path, mode=mode, **engine_kwargs) self.book = OpenDocumentSpreadsheet() - self.style_dict = {} + self._style_dict = {} def save(self): """ @@ -59,8 +60,8 @@ def write_cells( if _validate_freeze_panes(freeze_panes): self._create_freeze_panes(sheet_name, freeze_panes) - rows = defaultdict(TableRow) - col_count = defaultdict(int) + rows: DefautDict = defaultdict(TableRow) + col_count: DefaultDict = defaultdict(int) for cell in sorted(cells, key=lambda cell: (cell.row, cell.col)): # fill with empty cells if needed @@ -133,10 +134,10 @@ def _process_style(self, style): if style is None: return None style_key = json.dumps(style) - if style_key in self.style_dict: - return self.style_dict[style_key] - name = f"pd{len(self.style_dict)+1}" - self.style_dict[style_key] = name + if style_key in self._style_dict: + return self._style_dict[style_key] + name = f"pd{len(self._style_dict)+1}" + self._style_dict[style_key] = name odf_style = Style(name=name, family="table-cell") if "font" in style: font = style["font"] From ac0c96f339e7d54b7e76062cc9c0a3655f20be97 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 22 Mar 2020 22:49:56 +0100 Subject: [PATCH 16/40] Fix typo in type annotation --- pandas/io/excel/_odswriter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index d5c919060ed67..20caa0be9f879 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -60,7 +60,7 @@ def write_cells( if _validate_freeze_panes(freeze_panes): self._create_freeze_panes(sheet_name, freeze_panes) - rows: DefautDict = defaultdict(TableRow) + rows: DefaultDict = defaultdict(TableRow) col_count: DefaultDict = defaultdict(int) for cell in sorted(cells, key=lambda cell: (cell.row, cell.col)): From 1fdabc697e4ce2f0b81c7da48394ee46a2b5a43f Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 22 Mar 2020 22:55:16 +0100 Subject: [PATCH 17/40] Remove commented out debug code --- pandas/io/excel/_odfreader.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index a7403a5289eae..85ec9afaaec25 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -120,7 +120,6 @@ def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: if len(row) < max_row_len: row.extend([self.empty_value] * (max_row_len - len(row))) - # print(table) return table def _get_row_repeat(self, row) -> int: @@ -151,12 +150,10 @@ def _is_empty_row(self, row) -> bool: def _get_cell_value(self, cell, convert_float: bool) -> Scalar: from odf.namespaces import OFFICENS - # print("\ncell: ", cell, convert_float) if str(cell) == "#N/A": return np.nan cell_type = cell.attributes.get((OFFICENS, "value-type")) - # print("type=", cell_type, "value=", repr(cell_value)) if cell_type == "boolean": if str(cell) == "TRUE": return True @@ -166,7 +163,6 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar: elif cell_type == "float": # GH5394 cell_value = float(cell.attributes.get((OFFICENS, "value"))) - # print("value = ", value) if convert_float: val = int(cell_value) if val == cell_value: From 149e1c58cec2bd9103baa1b113971c5fcf320329 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 22 Mar 2020 23:18:41 +0100 Subject: [PATCH 18/40] Move imports inside methods --- pandas/io/excel/_odswriter.py | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 20caa0be9f879..1a9d80da7518f 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -2,18 +2,6 @@ import datetime from typing import DefaultDict -from odf.config import ( - ConfigItem, - ConfigItemMapEntry, - ConfigItemMapIndexed, - ConfigItemMapNamed, - ConfigItemSet, -) -from odf.opendocument import OpenDocumentSpreadsheet -from odf.style import ParagraphProperties, Style, TableCellProperties, TextProperties -from odf.table import Table, TableCell, TableRow -from odf.text import P - import pandas._libs.json as json from pandas.io.excel._base import ExcelWriter @@ -25,6 +13,8 @@ class _ODSWriter(ExcelWriter): supported_extensions = (".ods",) def __init__(self, path, engine=None, encoding=None, mode="w", **engine_kwargs): + from odf.opendocument import OpenDocumentSpreadsheet + engine_kwargs["engine"] = engine if mode == "a": @@ -46,6 +36,9 @@ def save(self): def write_cells( self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None ): + from odf.table import Table, TableCell, TableRow + from odf.text import P + # Write the frame cells using odf # assert startrow == 0 # assert startcol == 0 @@ -90,6 +83,8 @@ def _make_table_cell_attributes(self, cell): return attributes def _make_table_cell(self, cell): + from odf.table import TableCell + attributes = self._make_table_cell_attributes(cell) val, fmt = self._value_with_fmt(cell.val) pvalue = value = val @@ -131,6 +126,13 @@ def _make_table_cell(self, cell): ) def _process_style(self, style): + from odf.style import ( + ParagraphProperties, + Style, + TableCellProperties, + TextProperties, + ) + if style is None: return None style_key = json.dumps(style) @@ -164,6 +166,14 @@ def _process_style(self, style): return name def _create_freeze_panes(self, sheet_name, freeze_panes): + from odf.config import ( + ConfigItem, + ConfigItemMapEntry, + ConfigItemMapIndexed, + ConfigItemMapNamed, + ConfigItemSet, + ) + config_item_set = ConfigItemSet(name="ooo:view-settings") self.book.settings.addElement(config_item_set) From defb5c126571d984f4d0129dc70e8e1dd7e19065 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 22 Mar 2020 23:19:01 +0100 Subject: [PATCH 19/40] Move skip into test --- pandas/tests/io/excel/test_writers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 2ada04179daae..e337b048e2850 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -298,7 +298,6 @@ def test_multiindex_interval_datetimes(self, ext): @td.skip_if_no("xlrd") -@td.skip_if_no("odf") @pytest.mark.parametrize( "engine,ext", [ @@ -306,7 +305,7 @@ def test_multiindex_interval_datetimes(self, ext): pytest.param("openpyxl", ".xlsm", marks=td.skip_if_no("openpyxl")), pytest.param("xlwt", ".xls", marks=td.skip_if_no("xlwt")), pytest.param("xlsxwriter", ".xlsx", marks=td.skip_if_no("xlsxwriter")), - pytest.param("odf", ".ods"), + pytest.param("odf", ".ods", marks=td.skip_if_no("odf")), ], ) @pytest.mark.usefixtures("set_engine") From af530a453da72d503ceac34586c1fba2c51c2eab Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Tue, 24 Mar 2020 21:38:44 +0100 Subject: [PATCH 20/40] mypy fix --- pandas/io/excel/_odswriter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 1a9d80da7518f..61ec56c6392d6 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -1,6 +1,6 @@ from collections import defaultdict import datetime -from typing import DefaultDict +from typing import DefaultDict, Dict import pandas._libs.json as json @@ -23,7 +23,7 @@ def __init__(self, path, engine=None, encoding=None, mode="w", **engine_kwargs): super().__init__(path, mode=mode, **engine_kwargs) self.book = OpenDocumentSpreadsheet() - self._style_dict = {} + self._style_dict: Dict[str, str] = {} def save(self): """ From febd3ba0a6e0d6af03bb244982f6eb5f0c916c2c Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Tue, 24 Mar 2020 21:47:53 +0100 Subject: [PATCH 21/40] Add whatsnew entry --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 60aa1759958f6..64dd206636cbe 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -316,6 +316,7 @@ Other enhancements - :meth:`~pandas.io.gbq.read_gbq` now supports the ``max_results`` kwarg from ``pandas-gbq`` (:issue:`34639`). - :meth:`Dataframe.cov` and :meth:`Series.cov` now support a new parameter ddof to support delta degrees of freedom as in the corresponding numpy methods (:issue:`34611`). - :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list of dict to change only some specific columns' width (:issue:`28917`). +- :meth:`DataFrame.to_excel` can now also generate OpenOffice spreadsheet (.ods) files (:issue:`27222`) .. --------------------------------------------------------------------------- From dac7cb67c9dc3456f441932e203d4aaa0304c00a Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Tue, 24 Mar 2020 21:56:31 +0100 Subject: [PATCH 22/40] Simplify datetime formatting by removing useless check --- pandas/io/excel/_odswriter.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 61ec56c6392d6..ec39314c30ce9 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -92,12 +92,8 @@ def _make_table_cell(self, cell): value = str(val).lower() pvalue = str(val).upper() if isinstance(val, datetime.datetime): - if val.time(): - value = val.isoformat() - pvalue = val.strftime("%c") - else: - value = val.strftime("%Y-%m-%d") - pvalue = val.strftime("%x") + value = val.isoformat() + pvalue = val.strftime("%c") return ( pvalue, TableCell(valuetype="date", datevalue=value, attributes=attributes), From d64fb96fbcfea9db760ea16ec61cee76a83b60b3 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Tue, 24 Mar 2020 22:14:00 +0100 Subject: [PATCH 23/40] Add support for startrow and startcol arguments --- pandas/io/excel/_odswriter.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index ec39314c30ce9..61a2f6d918ef5 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -36,12 +36,12 @@ def save(self): def write_cells( self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None ): + """ + Write the frame cells using odf + """ from odf.table import Table, TableCell, TableRow from odf.text import P - # Write the frame cells using odf - # assert startrow == 0 - # assert startcol == 0 sheet_name = self._get_sheet_name(sheet_name) if sheet_name in self.sheets: @@ -53,10 +53,18 @@ def write_cells( if _validate_freeze_panes(freeze_panes): self._create_freeze_panes(sheet_name, freeze_panes) + for _ in range(startrow): + wks.addElement(TableRow()) + rows: DefaultDict = defaultdict(TableRow) col_count: DefaultDict = defaultdict(int) for cell in sorted(cells, key=lambda cell: (cell.row, cell.col)): + # only add empty cells if the row is still empty + if not col_count[cell.row]: + for _ in range(startcol): + rows[cell.row].addElement(TableCell()) + # fill with empty cells if needed for _ in range(cell.col - col_count[cell.row]): rows[cell.row].addElement(TableCell()) From 54fbbf81421c64d97ede720235189c16d32408af Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sat, 28 Mar 2020 15:59:55 +0100 Subject: [PATCH 24/40] Add automatic OpenDocument Spreadsheet recognition to ExcelFile class --- pandas/io/excel/_base.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 371fbaeccdd87..7755dc7d9b19b 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1,6 +1,6 @@ import abc import datetime -from io import BytesIO +from io import BytesIO, IOBase import os from textwrap import fill @@ -778,6 +778,17 @@ def close(self): return self.save() +def _is_ods_stream(stream): + stream.seek(0) + is_ods = False + if stream.read(4) == b"PK\003\004": + stream.seek(30) + is_ods = stream.read(54) == b"mimetype" \ + b"application/vnd.oasis.opendocument.spreadsheet" + stream.seek(0) + return is_ods + + class ExcelFile: """ Class for parsing tabular excel sheets into DataFrame objects. @@ -816,9 +827,12 @@ class ExcelFile: def __init__(self, path_or_io, engine=None): if engine is None: engine = "xlrd" - if isinstance(path_or_io, str): - ext = os.path.splitext(path_or_io)[-1][1:] - if ext == "ods": + if isinstance(path_or_io, IOBase): + if _is_ods_stream(path_or_io): + engine = "odf" + else: + ext = os.path.splitext(str(path_or_io))[-1] + if ext == ".ods": engine = "odf" if engine not in self._engines: raise ValueError(f"Unknown engine: {engine}") From d6e48fbecd9b37f202bb2e32b37038ed637a2375 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sat, 28 Mar 2020 16:00:43 +0100 Subject: [PATCH 25/40] Improve import dependency parameterization --- pandas/tests/io/excel/test_writers.py | 49 ++++++++++----------------- 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index e337b048e2850..3143d3a997ba1 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -48,12 +48,17 @@ def set_engine(engine, ext): set_option(option_name, prev_engine) # Roll back option change -@td.skip_if_no("xlrd") -@td.skip_if_no("odf") -@pytest.mark.parametrize("ext", [".xls", ".xlsx", ".xlsm", ".ods"]) +@pytest.mark.parametrize( + "ext", + [ + pytest.param(".xlsx", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]), + pytest.param(".xlsm", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]), + pytest.param(".xls", marks=[td.skip_if_no("xlwt"), td.skip_if_no("xlrd")]), + pytest.param(".xlsx", marks=[td.skip_if_no("xlsxwriter"), td.skip_if_no("xlrd")]), + pytest.param(".ods", marks=td.skip_if_no("odf")), + ], +) class TestRoundTrip: - @td.skip_if_no("xlwt") - @td.skip_if_no("openpyxl") @pytest.mark.parametrize( "header,expected", [(None, DataFrame([np.nan] * 4)), (0, DataFrame({"Unnamed: 0": [np.nan] * 3}))], @@ -71,8 +76,6 @@ def test_read_one_empty_col_no_header(self, ext, header, expected): tm.assert_frame_equal(result, expected) - @td.skip_if_no("xlwt") - @td.skip_if_no("openpyxl") @pytest.mark.parametrize( "header,expected", [(None, DataFrame([0] + [np.nan] * 4)), (0, DataFrame([np.nan] * 4))], @@ -89,8 +92,6 @@ def test_read_one_empty_col_with_header(self, ext, header, expected): tm.assert_frame_equal(result, expected) - @td.skip_if_no("openpyxl") - @td.skip_if_no("xlwt") def test_set_column_names_in_parameter(self, ext): # GH 12870 : pass down column names associated with # keyword argument names @@ -117,8 +118,6 @@ def test_set_column_names_in_parameter(self, ext): tm.assert_frame_equal(xlsdf_no_head, refdf) tm.assert_frame_equal(xlsdf_with_head, refdf) - @td.skip_if_no("xlwt") - @td.skip_if_no("openpyxl") def test_creating_and_reading_multiple_sheets(self, ext): # see gh-9450 # @@ -143,7 +142,6 @@ def tdf(col_sheet_name): for s in sheets: tm.assert_frame_equal(dfs[s], dfs_returned[s]) - @td.skip_if_no("xlsxwriter") def test_read_excel_multiindex_empty_level(self, ext): # see gh-12453 with tm.ensure_clean(ext) as path: @@ -191,7 +189,6 @@ def test_read_excel_multiindex_empty_level(self, ext): actual = pd.read_excel(path, header=[0, 1], index_col=0) tm.assert_frame_equal(actual, expected) - @td.skip_if_no("xlsxwriter") @pytest.mark.parametrize("c_idx_names", [True, False]) @pytest.mark.parametrize("r_idx_names", [True, False]) @pytest.mark.parametrize("c_idx_levels", [1, 3]) @@ -241,8 +238,6 @@ def test_excel_multindex_roundtrip( ) tm.assert_frame_equal(df, act, check_names=check_names) - @td.skip_if_no("xlwt") - @td.skip_if_no("openpyxl") def test_read_excel_parse_dates(self, ext): # see gh-11544, gh-12051 df = DataFrame( @@ -297,14 +292,13 @@ def test_multiindex_interval_datetimes(self, ext): tm.assert_frame_equal(result, expected) -@td.skip_if_no("xlrd") @pytest.mark.parametrize( "engine,ext", [ - pytest.param("openpyxl", ".xlsx", marks=td.skip_if_no("openpyxl")), - pytest.param("openpyxl", ".xlsm", marks=td.skip_if_no("openpyxl")), - pytest.param("xlwt", ".xls", marks=td.skip_if_no("xlwt")), - pytest.param("xlsxwriter", ".xlsx", marks=td.skip_if_no("xlsxwriter")), + pytest.param("openpyxl", ".xlsx", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]), + pytest.param("openpyxl", ".xlsm", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]), + pytest.param("xlwt", ".xls", marks=[td.skip_if_no("xlwt"), td.skip_if_no("xlrd")]), + pytest.param("xlsxwriter", ".xlsx", marks=[td.skip_if_no("xlsxwriter"), td.skip_if_no("xlrd")]), pytest.param("odf", ".ods", marks=td.skip_if_no("odf")), ], ) @@ -329,8 +323,6 @@ def test_excel_sheet_size(self, path): col_df.to_excel(path) def test_excel_sheet_by_name_raise(self, path, engine): - import xlrd - gt = DataFrame(np.random.randn(10, 2)) gt.to_excel(path) @@ -344,6 +336,7 @@ def test_excel_sheet_by_name_raise(self, path, engine): with pytest.raises(ValueError, match=msg): pd.read_excel(xl, "0") else: + import xlrd msg = "No sheet named <'0'>" with pytest.raises(xlrd.XLRDError, match=msg): pd.read_excel(xl, sheet_name="0") @@ -1200,9 +1193,7 @@ def test_bytes_io(self, engine): writer.save() bio.seek(0) - if engine != "odf": - engine = None - reread_df = pd.read_excel(bio, engine=engine, index_col=0) + reread_df = pd.read_excel(bio, index_col=0) tm.assert_frame_equal(df, reread_df) def test_write_lists_dict(self, path): @@ -1254,9 +1245,7 @@ def test_path_path_lib(self, engine, ext): df = tm.makeDataFrame() writer = partial(df.to_excel, engine=engine) - if engine != "odf": - engine = None - reader = partial(pd.read_excel, engine=engine, index_col=0) + reader = partial(pd.read_excel, index_col=0) result = tm.round_trip_pathlib(writer, reader, path=f"foo{ext}") tm.assert_frame_equal(result, df) @@ -1264,9 +1253,7 @@ def test_path_local_path(self, engine, ext): df = tm.makeDataFrame() writer = partial(df.to_excel, engine=engine) - if engine != "odf": - engine = None - reader = partial(pd.read_excel, engine=engine, index_col=0) + reader = partial(pd.read_excel, index_col=0) result = tm.round_trip_localpath(writer, reader, path=f"foo{ext}") tm.assert_frame_equal(result, df) From 635dd84524fb40b73e65bd463c26577c4be02102 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sat, 28 Mar 2020 16:11:24 +0100 Subject: [PATCH 26/40] Reformatting fixes (black) --- pandas/io/excel/_base.py | 4 +++- pandas/tests/io/excel/test_writers.py | 27 ++++++++++++++++++++++----- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 7755dc7d9b19b..76f87e937e395 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -783,8 +783,10 @@ def _is_ods_stream(stream): is_ods = False if stream.read(4) == b"PK\003\004": stream.seek(30) - is_ods = stream.read(54) == b"mimetype" \ + is_ods = ( + stream.read(54) == b"mimetype" b"application/vnd.oasis.opendocument.spreadsheet" + ) stream.seek(0) return is_ods diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 3143d3a997ba1..e3ee53b63e102 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -54,7 +54,9 @@ def set_engine(engine, ext): pytest.param(".xlsx", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]), pytest.param(".xlsm", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]), pytest.param(".xls", marks=[td.skip_if_no("xlwt"), td.skip_if_no("xlrd")]), - pytest.param(".xlsx", marks=[td.skip_if_no("xlsxwriter"), td.skip_if_no("xlrd")]), + pytest.param( + ".xlsx", marks=[td.skip_if_no("xlsxwriter"), td.skip_if_no("xlrd")] + ), pytest.param(".ods", marks=td.skip_if_no("odf")), ], ) @@ -295,10 +297,24 @@ def test_multiindex_interval_datetimes(self, ext): @pytest.mark.parametrize( "engine,ext", [ - pytest.param("openpyxl", ".xlsx", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]), - pytest.param("openpyxl", ".xlsm", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]), - pytest.param("xlwt", ".xls", marks=[td.skip_if_no("xlwt"), td.skip_if_no("xlrd")]), - pytest.param("xlsxwriter", ".xlsx", marks=[td.skip_if_no("xlsxwriter"), td.skip_if_no("xlrd")]), + pytest.param( + "openpyxl", + ".xlsx", + marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")], + ), + pytest.param( + "openpyxl", + ".xlsm", + marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")], + ), + pytest.param( + "xlwt", ".xls", marks=[td.skip_if_no("xlwt"), td.skip_if_no("xlrd")] + ), + pytest.param( + "xlsxwriter", + ".xlsx", + marks=[td.skip_if_no("xlsxwriter"), td.skip_if_no("xlrd")], + ), pytest.param("odf", ".ods", marks=td.skip_if_no("odf")), ], ) @@ -337,6 +353,7 @@ def test_excel_sheet_by_name_raise(self, path, engine): pd.read_excel(xl, "0") else: import xlrd + msg = "No sheet named <'0'>" with pytest.raises(xlrd.XLRDError, match=msg): pd.read_excel(xl, sheet_name="0") From 2de77552d73b5a25416410ab960d47e6d44c8518 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Tue, 9 Jun 2020 21:51:16 +0200 Subject: [PATCH 27/40] Rename parameter path_or_io to path_or_buffer --- pandas/io/excel/_base.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 76f87e937e395..cc9a65eb4b0af 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -799,8 +799,8 @@ class ExcelFile: Parameters ---------- - path_or_io : str, path object (pathlib.Path or py._path.local.LocalPath), - a file-like object, xlrd workbook or openpypl workbook. + path_or_buffer : str, path object (pathlib.Path or py._path.local.LocalPath), + a file-like object, xlrd workbook or openpypl workbook. If a string or path object, expected to be a path to a .xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file. engine : str, default None @@ -826,14 +826,14 @@ class ExcelFile: "pyxlsb": _PyxlsbReader, } - def __init__(self, path_or_io, engine=None): + def __init__(self, path_or_buffer, engine=None): if engine is None: engine = "xlrd" - if isinstance(path_or_io, IOBase): - if _is_ods_stream(path_or_io): + if isinstance(path_or_buffer, IOBase): + if _is_ods_stream(path_or_buffer): engine = "odf" else: - ext = os.path.splitext(str(path_or_io))[-1] + ext = os.path.splitext(str(path_or_buffer))[-1] if ext == ".ods": engine = "odf" if engine not in self._engines: @@ -842,9 +842,9 @@ def __init__(self, path_or_io, engine=None): self.engine = engine # Could be a str, ExcelFile, Book, etc. - self.io = path_or_io + self.io = path_or_buffer # Always a string - self._io = stringify_path(path_or_io) + self._io = stringify_path(path_or_buffer) self._reader = self._engines[engine](self._io) From 19f0a5ca9475000bb02cf31c053f2c188a0be106 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Tue, 9 Jun 2020 22:34:25 +0200 Subject: [PATCH 28/40] Add doc-strings and type annotations --- pandas/io/excel/_odswriter.py | 67 +++++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 61a2f6d918ef5..d4f6b98508506 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -1,6 +1,6 @@ from collections import defaultdict import datetime -from typing import DefaultDict, Dict +from typing import Any, DefaultDict, Dict, List, Tuple import pandas._libs.json as json @@ -12,7 +12,8 @@ class _ODSWriter(ExcelWriter): engine = "odf" supported_extensions = (".ods",) - def __init__(self, path, engine=None, encoding=None, mode="w", **engine_kwargs): + def __init__(self, path: str, engine: Dict = None, mode: str = "w", + **engine_kwargs): from odf.opendocument import OpenDocumentSpreadsheet engine_kwargs["engine"] = engine @@ -25,17 +26,18 @@ def __init__(self, path, engine=None, encoding=None, mode="w", **engine_kwargs): self.book = OpenDocumentSpreadsheet() self._style_dict: Dict[str, str] = {} - def save(self): + def save(self) -> None: """ Save workbook to disk. """ for sheet in self.sheets.values(): self.book.spreadsheet.addElement(sheet) - return self.book.save(self.path) + self.book.save(self.path) def write_cells( - self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None - ): + self, cells, sheet_name: str = None, startrow: int = 0, startcol: int = 0, + freeze_panes: List = None + ) -> None: """ Write the frame cells using odf """ @@ -80,7 +82,19 @@ def write_cells( for row_nr in range(max(rows.keys()) + 1): wks.addElement(rows[row_nr]) - def _make_table_cell_attributes(self, cell): + def _make_table_cell_attributes(self, cell) -> Dict[str, object]: + """Convert cell attributes to OpenDocument attributes + + Parameters + ---------- + cell : ExcelCell + Spreadsheet cell data + + Returns + ------- + attributes : Dict[str, object] + Dictionary with attributes and attribute values + """ attributes = {} style_name = self._process_style(cell.style) if style_name is not None: @@ -90,7 +104,19 @@ def _make_table_cell_attributes(self, cell): attributes["numbercolumnsspanned"] = cell.mergeend return attributes - def _make_table_cell(self, cell): + def _make_table_cell(self, cell) -> Tuple[str, object]: + """Convert cell data to an OpenDocument spreadsheet cell + + Parameters + ---------- + cell : ExcelCell + Spreadsheet cell data + + Returns + ------- + pvalue, cell : Tuple[str, object] + Display value, Cell value + """ from odf.table import TableCell attributes = self._make_table_cell_attributes(cell) @@ -129,7 +155,19 @@ def _make_table_cell(self, cell): ), ) - def _process_style(self, style): + def _process_style(self, style: Dict[str, Any]) -> str: + """Convert a style dictionary to a OpenDocument style sheet + + Parameters + ---------- + style : Dict + Style dictionary + + Returns + ------- + style_key : str + Unique style key for for later reference in sheet + """ from odf.style import ( ParagraphProperties, Style, @@ -169,7 +207,16 @@ def _process_style(self, style): self.book.styles.addElement(odf_style) return name - def _create_freeze_panes(self, sheet_name, freeze_panes): + def _create_freeze_panes(self, sheet_name: str, freeze_panes: List[int]) -> None: + """Create freeze panes in the sheet + + Parameters + ---------- + sheet_name : str + Name of the spreadsheet + freeze_panes : list + Freeze pane location x and y + """ from odf.config import ( ConfigItem, ConfigItemMapEntry, From 89f742f0a3539219b6e88eae53314068a7c0dc5c Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Tue, 9 Jun 2020 22:45:00 +0200 Subject: [PATCH 29/40] Update whatsnew according to suggestion by jreback --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 64dd206636cbe..9f550035dd7e6 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -316,7 +316,7 @@ Other enhancements - :meth:`~pandas.io.gbq.read_gbq` now supports the ``max_results`` kwarg from ``pandas-gbq`` (:issue:`34639`). - :meth:`Dataframe.cov` and :meth:`Series.cov` now support a new parameter ddof to support delta degrees of freedom as in the corresponding numpy methods (:issue:`34611`). - :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list of dict to change only some specific columns' width (:issue:`28917`). -- :meth:`DataFrame.to_excel` can now also generate OpenOffice spreadsheet (.ods) files (:issue:`27222`) +- :meth:`DataFrame.to_excel` can now also write OpenOffice spreadsheet (.ods) files (:issue:`27222`) .. --------------------------------------------------------------------------- From 171fc61be0a66beffac17d35a9263a42d0f8ea03 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Tue, 9 Jun 2020 23:40:48 +0200 Subject: [PATCH 30/40] Black reformatting --- pandas/io/excel/_odswriter.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index d4f6b98508506..2bd346db46ebd 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -12,8 +12,9 @@ class _ODSWriter(ExcelWriter): engine = "odf" supported_extensions = (".ods",) - def __init__(self, path: str, engine: Dict = None, mode: str = "w", - **engine_kwargs): + def __init__( + self, path: str, engine: Dict = None, mode: str = "w", **engine_kwargs + ): from odf.opendocument import OpenDocumentSpreadsheet engine_kwargs["engine"] = engine @@ -35,8 +36,12 @@ def save(self) -> None: self.book.save(self.path) def write_cells( - self, cells, sheet_name: str = None, startrow: int = 0, startcol: int = 0, - freeze_panes: List = None + self, + cells, + sheet_name: str = None, + startrow: int = 0, + startcol: int = 0, + freeze_panes: List = None, ) -> None: """ Write the frame cells using odf From 0d15a205026a7c5e0422a72ad5965b71881a6b06 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Tue, 9 Jun 2020 23:57:15 +0200 Subject: [PATCH 31/40] Fix some type annotations --- pandas/io/excel/_odswriter.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 2bd346db46ebd..0785c9151eac8 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -1,6 +1,6 @@ from collections import defaultdict import datetime -from typing import Any, DefaultDict, Dict, List, Tuple +from typing import Any, DefaultDict, Dict, List, Optional, Tuple import pandas._libs.json as json @@ -13,7 +13,7 @@ class _ODSWriter(ExcelWriter): supported_extensions = (".ods",) def __init__( - self, path: str, engine: Dict = None, mode: str = "w", **engine_kwargs + self, path: str, engine: Optional[str] = None, mode: str = "w", **engine_kwargs ): from odf.opendocument import OpenDocumentSpreadsheet @@ -38,10 +38,10 @@ def save(self) -> None: def write_cells( self, cells, - sheet_name: str = None, + sheet_name: Optional[str] = None, startrow: int = 0, startcol: int = 0, - freeze_panes: List = None, + freeze_panes: Optional[List] = None, ) -> None: """ Write the frame cells using odf @@ -100,7 +100,7 @@ def _make_table_cell_attributes(self, cell) -> Dict[str, object]: attributes : Dict[str, object] Dictionary with attributes and attribute values """ - attributes = {} + attributes: Dict[str, object] = {} style_name = self._process_style(cell.style) if style_name is not None: attributes["stylename"] = style_name From 336c2318d6f62ff222da005071606723c43af092 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 14 Jun 2020 22:37:03 +0200 Subject: [PATCH 32/40] Some type fixes --- pandas/io/excel/_odswriter.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 0785c9151eac8..3e065e9ce5f4e 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -12,6 +12,8 @@ class _ODSWriter(ExcelWriter): engine = "odf" supported_extensions = (".ods",) + from odf.table import Table, TableCell, TableRow + def __init__( self, path: str, engine: Optional[str] = None, mode: str = "w", **engine_kwargs ): @@ -24,7 +26,7 @@ def __init__( super().__init__(path, mode=mode, **engine_kwargs) - self.book = OpenDocumentSpreadsheet() + self.book: OpenDocumentSpreadsheet = OpenDocumentSpreadsheet() self._style_dict: Dict[str, str] = {} def save(self) -> None: @@ -46,7 +48,6 @@ def write_cells( """ Write the frame cells using odf """ - from odf.table import Table, TableCell, TableRow from odf.text import P sheet_name = self._get_sheet_name(sheet_name) @@ -109,7 +110,7 @@ def _make_table_cell_attributes(self, cell) -> Dict[str, object]: attributes["numbercolumnsspanned"] = cell.mergeend return attributes - def _make_table_cell(self, cell) -> Tuple[str, object]: + def _make_table_cell(self, cell) -> Tuple[str, TableCell]: """Convert cell data to an OpenDocument spreadsheet cell Parameters @@ -122,7 +123,6 @@ def _make_table_cell(self, cell) -> Tuple[str, object]: pvalue, cell : Tuple[str, object] Display value, Cell value """ - from odf.table import TableCell attributes = self._make_table_cell_attributes(cell) val, fmt = self._value_with_fmt(cell.val) From 3edfbd8806149f181f946f0e5eb7a6aa072e5e72 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 14 Jun 2020 22:47:12 +0200 Subject: [PATCH 33/40] Revert some of the typing fixes as they break some of the builds --- pandas/io/excel/_odswriter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 3e065e9ce5f4e..97068c8b59dd4 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -12,8 +12,6 @@ class _ODSWriter(ExcelWriter): engine = "odf" supported_extensions = (".ods",) - from odf.table import Table, TableCell, TableRow - def __init__( self, path: str, engine: Optional[str] = None, mode: str = "w", **engine_kwargs ): @@ -48,6 +46,7 @@ def write_cells( """ Write the frame cells using odf """ + from odf.table import Table, TableCell, TableRow from odf.text import P sheet_name = self._get_sheet_name(sheet_name) @@ -110,7 +109,7 @@ def _make_table_cell_attributes(self, cell) -> Dict[str, object]: attributes["numbercolumnsspanned"] = cell.mergeend return attributes - def _make_table_cell(self, cell) -> Tuple[str, TableCell]: + def _make_table_cell(self, cell) -> Tuple[str, object]: """Convert cell data to an OpenDocument spreadsheet cell Parameters @@ -123,6 +122,7 @@ def _make_table_cell(self, cell) -> Tuple[str, TableCell]: pvalue, cell : Tuple[str, object] Display value, Cell value """ + from odf.table import TableCell attributes = self._make_table_cell_attributes(cell) val, fmt = self._value_with_fmt(cell.val) From 97707b8ea98befdcc5470fb28d7be7c6b797aed7 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 14 Jun 2020 23:15:09 +0200 Subject: [PATCH 34/40] More mypy typing fixes --- pandas/io/excel/_odswriter.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index 97068c8b59dd4..c5aaf0ee74f14 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -50,6 +50,7 @@ def write_cells( from odf.text import P sheet_name = self._get_sheet_name(sheet_name) + assert sheet_name is not None if sheet_name in self.sheets: wks = self.sheets[sheet_name] @@ -58,6 +59,7 @@ def write_cells( self.sheets[sheet_name] = wks if _validate_freeze_panes(freeze_panes): + assert freeze_panes is not None self._create_freeze_panes(sheet_name, freeze_panes) for _ in range(startrow): @@ -87,7 +89,7 @@ def write_cells( for row_nr in range(max(rows.keys()) + 1): wks.addElement(rows[row_nr]) - def _make_table_cell_attributes(self, cell) -> Dict[str, object]: + def _make_table_cell_attributes(self, cell) -> Dict[str, Any]: """Convert cell attributes to OpenDocument attributes Parameters From 45467d289cb5e471e426f3a8ecf55431adb271a8 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 14 Jun 2020 23:24:08 +0200 Subject: [PATCH 35/40] Add more typing info --- pandas/io/excel/_odswriter.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index c5aaf0ee74f14..ffcbb6b87708f 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -1,6 +1,6 @@ from collections import defaultdict import datetime -from typing import Any, DefaultDict, Dict, List, Optional, Tuple +from typing import Any, DefaultDict, Dict, List, Optional, Tuple, Union import pandas._libs.json as json @@ -89,7 +89,7 @@ def write_cells( for row_nr in range(max(rows.keys()) + 1): wks.addElement(rows[row_nr]) - def _make_table_cell_attributes(self, cell) -> Dict[str, Any]: + def _make_table_cell_attributes(self, cell) -> Dict[str, Union[int, str]]: """Convert cell attributes to OpenDocument attributes Parameters @@ -99,10 +99,10 @@ def _make_table_cell_attributes(self, cell) -> Dict[str, Any]: Returns ------- - attributes : Dict[str, object] + attributes : Dict[str, Union[int, str]] Dictionary with attributes and attribute values """ - attributes: Dict[str, object] = {} + attributes: Dict[str, Union[int, str]] = {} style_name = self._process_style(cell.style) if style_name is not None: attributes["stylename"] = style_name From b14847d3b411aa3a663daef94c8322fca4c3167d Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Sun, 14 Jun 2020 23:47:17 +0200 Subject: [PATCH 36/40] And yet more typing fixes --- pandas/io/excel/_odswriter.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index ffcbb6b87708f..0131240f99cf6 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -6,6 +6,7 @@ from pandas.io.excel._base import ExcelWriter from pandas.io.excel._util import _validate_freeze_panes +from pandas.io.formats.excel import ExcelCell class _ODSWriter(ExcelWriter): @@ -37,7 +38,7 @@ def save(self) -> None: def write_cells( self, - cells, + cells: List[ExcelCell], sheet_name: Optional[str] = None, startrow: int = 0, startcol: int = 0, @@ -111,7 +112,7 @@ def _make_table_cell_attributes(self, cell) -> Dict[str, Union[int, str]]: attributes["numbercolumnsspanned"] = cell.mergeend return attributes - def _make_table_cell(self, cell) -> Tuple[str, object]: + def _make_table_cell(self, cell) -> Tuple[str, Any]: """Convert cell data to an OpenDocument spreadsheet cell Parameters @@ -121,7 +122,7 @@ def _make_table_cell(self, cell) -> Tuple[str, object]: Returns ------- - pvalue, cell : Tuple[str, object] + pvalue, cell : Tuple[str, TableCell] Display value, Cell value """ from odf.table import TableCell From d4d3a7c2075e756dc6aa8e2353e29debc1f13e44 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Mon, 15 Jun 2020 00:05:24 +0200 Subject: [PATCH 37/40] Add doc-string and type info to _is_ods_stream --- pandas/io/excel/_base.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index cc9a65eb4b0af..27d5f73be7813 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1,8 +1,9 @@ import abc import datetime -from io import BytesIO, IOBase +from io import BytesIO, BufferedIOBase, RawIOBase import os from textwrap import fill +from typing import Union from pandas._config import config @@ -778,7 +779,22 @@ def close(self): return self.save() -def _is_ods_stream(stream): +def _is_ods_stream(stream: Union[BufferedIOBase, RawIOBase]) -> bool: + """ + Check if the stream is an OpenDocument Spreadsheet (.ods) file + + It uses magic values inside the stream + + Parameters + ---------- + stream : Union[BufferedIOBase, RawIOBase] + IO stream with data which might be an ODS file + + Returns + ------- + is_ods : bool + Boolean indication that this is indeed an ODS file or not + """ stream.seek(0) is_ods = False if stream.read(4) == b"PK\003\004": @@ -829,7 +845,7 @@ class ExcelFile: def __init__(self, path_or_buffer, engine=None): if engine is None: engine = "xlrd" - if isinstance(path_or_buffer, IOBase): + if isinstance(path_or_buffer, (BufferedIOBase, RawIOBase)): if _is_ods_stream(path_or_buffer): engine = "odf" else: From f82f4d4bf8eb0fabb307ccacaa532196106a2781 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Tue, 16 Jun 2020 20:43:36 +0200 Subject: [PATCH 38/40] Fix import order --- pandas/io/excel/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 27d5f73be7813..4fa4f158e9c3c 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1,6 +1,6 @@ import abc import datetime -from io import BytesIO, BufferedIOBase, RawIOBase +from io import BufferedIOBase, BytesIO, RawIOBase import os from textwrap import fill from typing import Union From f20e2cc27aa3dad2ae1401b58d9499de6232a857 Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Tue, 16 Jun 2020 21:06:08 +0200 Subject: [PATCH 39/40] Add test to check exception when writing in append mode --- pandas/tests/io/excel/test_odswriter.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 pandas/tests/io/excel/test_odswriter.py diff --git a/pandas/tests/io/excel/test_odswriter.py b/pandas/tests/io/excel/test_odswriter.py new file mode 100644 index 0000000000000..b50c641ebf0c0 --- /dev/null +++ b/pandas/tests/io/excel/test_odswriter.py @@ -0,0 +1,17 @@ +import pytest + +import pandas._testing as tm + +from pandas.io.excel import ExcelWriter + +odf = pytest.importorskip("odf") + +pytestmark = pytest.mark.parametrize("ext", [".ods"]) + + +def test_write_append_mode_raises(ext): + msg = "Append mode is not supported with odf!" + + with tm.ensure_clean(ext) as f: + with pytest.raises(ValueError, match=msg): + ExcelWriter(f, engine="odf", mode="a") From 9e2684fd69f51ca7fa8b0b0f98d708054273ccaa Mon Sep 17 00:00:00 2001 From: Robert de Vries Date: Wed, 24 Jun 2020 14:33:17 +0200 Subject: [PATCH 40/40] Add whatsnew entry for extra bug fix in read_excel for 0.0 values in odf files --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 9f550035dd7e6..ec3af524083c3 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -1019,6 +1019,7 @@ I/O - Bug in :meth:`~SQLDatabase.execute` was raising a ``ProgrammingError`` for some DB-API drivers when the SQL statement contained the `%` character and no parameters were present (:issue:`34211`) - Bug in :meth:`~pandas.io.stata.StataReader` which resulted in categorical variables with difference dtypes when reading data using an iterator. (:issue:`31544`) - :meth:`HDFStore.keys` has now an optional `include` parameter that allows the retrieval of all native HDF5 table names (:issue:`29916`) +- Bug in :meth:`read_excel` for ODS files removes 0.0 values (:issue:`27222`) Plotting ^^^^^^^^