Skip to content

Commit e8dcaf9

Browse files
ENH: Add ods writer (#32911)
1 parent 2786cda commit e8dcaf9

File tree

9 files changed

+405
-44
lines changed

9 files changed

+405
-44
lines changed

doc/source/whatsnew/v1.1.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,7 @@ Other enhancements
316316
- :meth:`~pandas.io.gbq.read_gbq` now supports the ``max_results`` kwarg from ``pandas-gbq`` (:issue:`34639`).
317317
- :meth:`Dataframe.cov` and :meth:`Series.cov` now support a new parameter ddof to support delta degrees of freedom as in the corresponding numpy methods (:issue:`34611`).
318318
- :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list of dict to change only some specific columns' width (:issue:`28917`).
319+
- :meth:`DataFrame.to_excel` can now also write OpenOffice spreadsheet (.ods) files (:issue:`27222`)
319320

320321
.. ---------------------------------------------------------------------------
321322
@@ -1018,6 +1019,7 @@ I/O
10181019
- Bug in :meth:`~SQLDatabase.execute` was raising a ``ProgrammingError`` for some DB-API drivers when the SQL statement contained the `%` character and no parameters were present (:issue:`34211`)
10191020
- Bug in :meth:`~pandas.io.stata.StataReader` which resulted in categorical variables with difference dtypes when reading data using an iterator. (:issue:`31544`)
10201021
- :meth:`HDFStore.keys` has now an optional `include` parameter that allows the retrieval of all native HDF5 table names (:issue:`29916`)
1022+
- Bug in :meth:`read_excel` for ODS files removes 0.0 values (:issue:`27222`)
10211023

10221024
Plotting
10231025
^^^^^^^^

pandas/core/config_init.py

+10
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,7 @@ def use_inf_as_na_cb(key):
553553
_xls_options = ["xlwt"]
554554
_xlsm_options = ["openpyxl"]
555555
_xlsx_options = ["openpyxl", "xlsxwriter"]
556+
_ods_options = ["odf"]
556557

557558

558559
with cf.config_prefix("io.excel.xls"):
@@ -581,6 +582,15 @@ def use_inf_as_na_cb(key):
581582
)
582583

583584

585+
with cf.config_prefix("io.excel.ods"):
586+
cf.register_option(
587+
"writer",
588+
"auto",
589+
writer_engine_doc.format(ext="ods", others=", ".join(_ods_options)),
590+
validator=str,
591+
)
592+
593+
584594
# Set up the io.parquet specific configuration.
585595
parquet_engine_doc = """
586596
: string

pandas/io/excel/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from pandas.io.excel._base import ExcelFile, ExcelWriter, read_excel
2+
from pandas.io.excel._odswriter import _ODSWriter
23
from pandas.io.excel._openpyxl import _OpenpyxlWriter
34
from pandas.io.excel._util import register_writer
45
from pandas.io.excel._xlsxwriter import _XlsxWriter
@@ -14,3 +15,6 @@
1415

1516

1617
register_writer(_XlsxWriter)
18+
19+
20+
register_writer(_ODSWriter)

pandas/io/excel/_base.py

+45-12
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import abc
22
import datetime
3-
from io import BytesIO
3+
from io import BufferedIOBase, BytesIO, RawIOBase
44
import os
55
from textwrap import fill
6+
from typing import Union
67

78
from pandas._config import config
89

@@ -533,13 +534,13 @@ class ExcelWriter(metaclass=abc.ABCMeta):
533534
"""
534535
Class for writing DataFrame objects into excel sheets.
535536
536-
Default is to use xlwt for xls, openpyxl for xlsx.
537+
Default is to use xlwt for xls, openpyxl for xlsx, odf for ods.
537538
See DataFrame.to_excel for typical usage.
538539
539540
Parameters
540541
----------
541542
path : str
542-
Path to xls or xlsx file.
543+
Path to xls or xlsx or ods file.
543544
engine : str (optional)
544545
Engine to use for writing. If None, defaults to
545546
``io.excel.<extension>.writer``. NOTE: can only be passed as a keyword
@@ -692,10 +693,7 @@ def __init__(
692693
# validate that this engine can handle the extension
693694
if isinstance(path, str):
694695
ext = os.path.splitext(path)[-1]
695-
else:
696-
ext = "xls" if engine == "xlwt" else "xlsx"
697-
698-
self.check_extension(ext)
696+
self.check_extension(ext)
699697

700698
self.path = path
701699
self.sheets = {}
@@ -781,6 +779,34 @@ def close(self):
781779
return self.save()
782780

783781

782+
def _is_ods_stream(stream: Union[BufferedIOBase, RawIOBase]) -> bool:
783+
"""
784+
Check if the stream is an OpenDocument Spreadsheet (.ods) file
785+
786+
It uses magic values inside the stream
787+
788+
Parameters
789+
----------
790+
stream : Union[BufferedIOBase, RawIOBase]
791+
IO stream with data which might be an ODS file
792+
793+
Returns
794+
-------
795+
is_ods : bool
796+
Boolean indication that this is indeed an ODS file or not
797+
"""
798+
stream.seek(0)
799+
is_ods = False
800+
if stream.read(4) == b"PK\003\004":
801+
stream.seek(30)
802+
is_ods = (
803+
stream.read(54) == b"mimetype"
804+
b"application/vnd.oasis.opendocument.spreadsheet"
805+
)
806+
stream.seek(0)
807+
return is_ods
808+
809+
784810
class ExcelFile:
785811
"""
786812
Class for parsing tabular excel sheets into DataFrame objects.
@@ -789,8 +815,8 @@ class ExcelFile:
789815
790816
Parameters
791817
----------
792-
io : str, path object (pathlib.Path or py._path.local.LocalPath),
793-
a file-like object, xlrd workbook or openpypl workbook.
818+
path_or_buffer : str, path object (pathlib.Path or py._path.local.LocalPath),
819+
a file-like object, xlrd workbook or openpypl workbook.
794820
If a string or path object, expected to be a path to a
795821
.xls, .xlsx, .xlsb, .xlsm, .odf, .ods, or .odt file.
796822
engine : str, default None
@@ -816,18 +842,25 @@ class ExcelFile:
816842
"pyxlsb": _PyxlsbReader,
817843
}
818844

819-
def __init__(self, io, engine=None):
845+
def __init__(self, path_or_buffer, engine=None):
820846
if engine is None:
821847
engine = "xlrd"
848+
if isinstance(path_or_buffer, (BufferedIOBase, RawIOBase)):
849+
if _is_ods_stream(path_or_buffer):
850+
engine = "odf"
851+
else:
852+
ext = os.path.splitext(str(path_or_buffer))[-1]
853+
if ext == ".ods":
854+
engine = "odf"
822855
if engine not in self._engines:
823856
raise ValueError(f"Unknown engine: {engine}")
824857

825858
self.engine = engine
826859

827860
# Could be a str, ExcelFile, Book, etc.
828-
self.io = io
861+
self.io = path_or_buffer
829862
# Always a string
830-
self._io = stringify_path(io)
863+
self._io = stringify_path(path_or_buffer)
831864

832865
self._reader = self._engines[engine](self._io)
833866

pandas/io/excel/_odfreader.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from typing import List, cast
22

3+
import numpy as np
4+
35
from pandas._typing import FilePathOrBuffer, Scalar
46
from pandas.compat._optional import import_optional_dependency
57

@@ -148,6 +150,9 @@ def _is_empty_row(self, row) -> bool:
148150
def _get_cell_value(self, cell, convert_float: bool) -> Scalar:
149151
from odf.namespaces import OFFICENS
150152

153+
if str(cell) == "#N/A":
154+
return np.nan
155+
151156
cell_type = cell.attributes.get((OFFICENS, "value-type"))
152157
if cell_type == "boolean":
153158
if str(cell) == "TRUE":
@@ -158,10 +163,6 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar:
158163
elif cell_type == "float":
159164
# GH5394
160165
cell_value = float(cell.attributes.get((OFFICENS, "value")))
161-
162-
if cell_value == 0.0: # NA handling
163-
return str(cell)
164-
165166
if convert_float:
166167
val = int(cell_value)
167168
if val == cell_value:

0 commit comments

Comments
 (0)