pandas-dev · jreback · Apr 6, 2020 · Apr 2, 2020 · Apr 2, 2020 · Apr 2, 2020
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -408,6 +408,7 @@ I/O
 - Bug in :meth:`read_csv` was raising a misleading exception on a permissions issue (:issue:`23784`)
 - Bug in :meth:`read_csv` was raising an ``IndexError`` when header=None and 2 extra data columns
 - Bug in :meth:`DataFrame.to_sql` where an ``AttributeError`` was raised when saving an out of bounds date (:issue:`26761`)
+- Bug in :meth:`read_excel` did not correctly handle multiple embedded spaces in OpenDocument text cells. (:issue:`32207`)
 
 Plotting
 ^^^^^^^^

diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
@@ -171,7 +171,7 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar:
             cell_value = cell.attributes.get((OFFICENS, "value"))
             return float(cell_value)
         elif cell_type == "string":
-            return str(cell)
+            return self._get_cell_string_value(cell)
         elif cell_type == "currency":
             cell_value = cell.attributes.get((OFFICENS, "value"))
             return float(cell_value)
@@ -182,3 +182,28 @@ def _get_cell_value(self, cell, convert_float: bool) -> Scalar:
             return pd.to_datetime(str(cell)).time()
         else:
             raise ValueError(f"Unrecognized type {cell_type}")
+
+    def _get_cell_string_value(self, cell) -> str:
+        """
+        Find and decode OpenDocument text:s tags that represent
+        a run length encoded sequence of space characters.
+        """
+        from odf.element import Text, Element
+        from odf.text import S, P
+        from odf.namespaces import TEXTNS
+
+        text_p = P().qname
+        text_s = S().qname
+
+        p = cell.childNodes[0]
+
+        value = []
+        if p.qname == text_p:
+            for k, fragment in enumerate(p.childNodes):
+                if isinstance(fragment, Text):
+                    value.append(fragment.data)
+                elif isinstance(fragment, Element):
+                    if fragment.qname == text_s:
+                        spaces = int(fragment.attributes.get((TEXTNS, "c"), 1))
+                    value.append(" " * spaces)
+        return "".join(value)
diff --git a/pandas/tests/io/data/excel/test_spaces.ods b/pandas/tests/io/data/excel/test_spaces.ods
diff --git a/pandas/tests/io/data/excel/test_spaces.xls b/pandas/tests/io/data/excel/test_spaces.xls
diff --git a/pandas/tests/io/data/excel/test_spaces.xlsb b/pandas/tests/io/data/excel/test_spaces.xlsb
diff --git a/pandas/tests/io/data/excel/test_spaces.xlsm b/pandas/tests/io/data/excel/test_spaces.xlsm
diff --git a/pandas/tests/io/data/excel/test_spaces.xlsx b/pandas/tests/io/data/excel/test_spaces.xlsx
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
@@ -464,6 +464,24 @@ def test_reader_dtype_str(self, read_ext, dtype, expected):
         actual = pd.read_excel(basename + read_ext, dtype=dtype)
         tm.assert_frame_equal(actual, expected)
 
+    def test_reader_spaces(self, read_ext):
+        # see gh-32207
+        basename = "test_spaces"
+
+        actual = pd.read_excel(basename + read_ext)
+        expected = DataFrame(
+            {
+                "testcol": [
+                    "this is great",
+                    "4    spaces",
+                    "1 trailing ",
+                    " 1 leading",
+                    "2  spaces  multiple  times",
+                ]
+            }
+        )
+        tm.assert_frame_equal(actual, expected)
+
     def test_reading_all_sheets(self, read_ext):
         # Test reading all sheetnames by setting sheetname to None,
         # Ensure a dict is returned.