TST: Add test cases for note reading/writing (pandas-dev#58070)

diogomsmiranda · Dacops · Dacops · commit cf75c28f489b · 2024-05-25T01:08:18.000Z
Co-authored-by: Dacops &lt;david.c.pires@tecnico.ulisboa.pt&gt;
diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py
@@ -117,7 +117,7 @@ def get_sheet_data(
         if notes is not None:
             raise NotImplementedError(
                 """
-                Notes are not supported in odfreader engine,
+                Notes are not supported by the odfreader engine,
                 see https://github.com/eea/odfpy
                 """
             )
diff --git a/pandas/tests/io/data/excel/test_read_notes.xls b/pandas/tests/io/data/excel/test_read_notes.xls
diff --git a/pandas/tests/io/data/excel/test_read_notes.xlsx b/pandas/tests/io/data/excel/test_read_notes.xlsx
diff --git a/pandas/tests/io/excel/test_odf.py b/pandas/tests/io/excel/test_odf.py
@@ -70,3 +70,15 @@ def test_read_cell_annotation():
     result = pd.read_excel("test_cell_annotation.ods")
 
     tm.assert_frame_equal(result, expected)
+
+
+def test_exception_read_with_notes():
+    with pytest.raises(
+        NotImplementedError,
+        match="""
+                Notes are not supported by the odfreader engine,
+                see https://github.com/eea/odfpy
+                """,
+    ):
+        df_notes = pd.DataFrame()
+        pd.read_excel("test_unempty_cells.ods", notes=df_notes)
diff --git a/pandas/tests/io/excel/test_odswriter.py b/pandas/tests/io/excel/test_odswriter.py
@@ -104,3 +104,18 @@ def test_cell_value_type(
         cell = sheet_cells[0]
         assert cell.attributes.get((OFFICENS, "value-type")) == cell_value_type
         assert cell.attributes.get((OFFICENS, cell_value_attribute)) == cell_value
+
+
+def test_exception_write_with_notes(tmp_excel):
+    with pytest.raises(
+        NotImplementedError,
+        match="""
+                Notes are not supported by the odswriter engine,
+                see https://github.com/mmulqueen/odswriter
+                """,
+    ):
+        notes = pd.DataFrame([["note 1", "note 2"], ["", "note 4"], ["note 5", ""]])
+
+        df = pd.DataFrame([[1, 2], [3, 4], [5, 6]])
+
+        df.style.set_tooltips(notes).to_excel(tmp_excel)
diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py
@@ -429,3 +429,12 @@ def test_read_multiindex_header_no_index_names(datapath, ext):
         index=pd.MultiIndex.from_tuples([("A", "AA", "AAA"), ("A", "BB", "BBB")]),
     )
     tm.assert_frame_equal(result, expected)
+
+
+def test_read_notes_from_xlsx_files(datapath, ext):
+    path = datapath("io", "data", "excel", f"test_read_notes{ext}")
+    expected = DataFrame([["note 1", "note 2"], ["", "note 4"], ["note 5", ""]])
+    result = DataFrame()
+    pd.read_excel(path, engine="openpyxl", notes=result)
+    
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
@@ -329,6 +329,153 @@ def test_multiindex_interval_datetimes(self, tmp_excel):
         )
         tm.assert_frame_equal(result, expected)
 
+    def test_read_write_with_notes(self, tmp_excel, ext):
+        if ext in ["xlsm", "xlsx"]:
+            expected = DataFrame(
+                [
+                    ["note 1", "note 2", "note 3"],
+                    ["note 1", "note 2", "note 3"],
+                    ["note 1", "note 2", "note 3"],
+                ]
+            )
+            df = DataFrame(
+                [
+                    [1, 100, 200],
+                    [2, 200, 300],
+                    [3, 300, 400],
+                ]
+            )
+            df.style.set_tooltips(expected).to_excel(tmp_excel)
+            result = DataFrame()
+            result = pd.read_excel(tmp_excel, notes=result)
+            tm.assert_frame_equal(result, expected)
+
+    def test_read_write_with_notes_trim_rows(self, tmp_excel, ext):
+        if ext in ["xlsm", "xlsx"]:
+            expected = DataFrame(
+                [
+                    ["", "", ""],
+                    ["note 1", "note 2", "note3"],
+                    ["", "", ""],
+                ]
+            )
+            df = DataFrame(
+                [
+                    [1, 100, 200],
+                    [2, 200, 300],
+                    [3, 300, 400],
+                ]
+            )
+            df.style.set_tooltips(expected).to_excel(tmp_excel)
+            result = DataFrame()
+            result = pd.read_excel(tmp_excel, notes=result)
+            tm.assert_frame_equal(result, expected)
+
+    def test_read_write_with_notes_trim_columns(self, tmp_excel, ext):
+        if ext in ["xlsm", "xlsx"]:
+            expected = DataFrame(
+                [
+                    ["", "note 2", ""],
+                    ["", "note 2", ""],
+                    ["", "note 2", ""],
+                ]
+            )
+            df = DataFrame(
+                [
+                    [1, 100, 200],
+                    [2, 200, 300],
+                    [3, 300, 400],
+                ]
+            )
+            df.style.set_tooltips(expected).to_excel(tmp_excel)
+            result = DataFrame()
+            result = pd.read_excel(tmp_excel, notes=result)
+            tm.assert_frame_equal(result, expected)
+
+    def test_read_write_with_notes_trim_rows_and_columns(self, tmp_excel, ext):
+        if ext in ["xlsm", "xlsx"]:
+            expected = DataFrame(
+                [
+                    ["", "", ""],
+                    ["", "note 2", ""],
+                    ["", "", ""],
+                ]
+            )
+            df = DataFrame(
+                [
+                    [1, 100, 200],
+                    [2, 200, 300],
+                    [3, 300, 400],
+                ]
+            )
+            df.style.set_tooltips(expected).to_excel(tmp_excel)
+            result = DataFrame()
+            result = pd.read_excel(tmp_excel, notes=result)
+            tm.assert_frame_equal(result, expected)
+
+    def test_read_write_with_notes_empty_comments_no_trim(self, tmp_excel, ext):
+        if ext in ["xlsm", "xlsx"]:
+            expected = DataFrame(
+                [
+                    ["note 1", "", ""],
+                    ["", "", ""],
+                    ["", "", "note 3"],
+                ]
+            )
+            df = DataFrame(
+                [
+                    [1, 100, 200],
+                    [2, 200, 300],
+                    [3, 300, 400],
+                ]
+            )
+            df.style.set_tooltips(expected).to_excel(tmp_excel)
+            result = DataFrame()
+            result = pd.read_excel(tmp_excel, notes=result)
+            tm.assert_frame_equal(result, expected)
+
+    def test_read_write_with_notes_smaller_dimensions(self, tmp_excel, ext):
+        if ext in ["xlsm", "xlsx"]:
+            expected = DataFrame(
+                [
+                    ["note 1", "note 2"],
+                    ["note 1", "note 2"],
+                ]
+            )
+            df = DataFrame(
+                [
+                    [1, 100, 200],
+                    [2, 200, 300],
+                    [3, 300, 400],
+                ]
+            )
+            df.style.set_tooltips(expected).to_excel(tmp_excel)
+            result = DataFrame()
+            result = pd.read_excel(tmp_excel, notes=result)
+            tm.assert_frame_equal(result, expected)
+
+    def test_read_write_with_notes_bigger_dimensions(self, tmp_excel, ext):
+        if ext in ["xlsm", "xlsx"]:
+            expected = DataFrame(
+                [
+                    ["note 1", "note 2", "note 3", "note 4"],
+                    ["note 1", "note 2", "note 3", "note 4"],
+                    ["note 1", "note 2", "note 3", "note 4"],
+                    ["note 1", "note 2", "note 3", "note 4"],
+                ]
+            )
+            df = DataFrame(
+                [
+                    [1, 100, 200],
+                    [2, 200, 300],
+                    [3, 300, 400],
+                ]
+            )
+            df.style.set_tooltips(expected).to_excel(tmp_excel)
+            result = DataFrame()
+            result = pd.read_excel(tmp_excel, notes=result)
+            tm.assert_frame_equal(result, expected)
+
 
 @pytest.mark.parametrize(
     "engine,ext",
diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py
@@ -69,3 +69,17 @@ def test_read_old_xls_files(file_header):
     # GH 41226
     f = io.BytesIO(file_header)
     assert inspect_excel_format(f) == "xls"
+
+
+def test_read_notes_from_xls_files(datapath, read_ext_xlrd):
+    expected = pd.DataFrame("", index=range(10), columns=range(10))
+
+    # Set specific values at (0, 0) and (10, 10)
+    expected.iloc[0, 0] = "note 1x1"
+    expected.iloc[9, 9] = "note 10x10"
+    path = datapath("io", "data", "excel", f"test_read_notes{read_ext_xlrd}")
+    result = pd.DataFrame()
+    pd.read_excel(path, engine="xlrd", notes=result)
+
+    tm.assert_frame_equal(result, expected)
+    
diff --git a/pandas/tests/io/excel/test_xlsxwriter.py b/pandas/tests/io/excel/test_xlsxwriter.py
@@ -7,6 +7,10 @@
 
 from pandas.io.excel import ExcelWriter
 
+import pandas as pd
+
+import pandas._testing as tm
+
 xlsxwriter = pytest.importorskip("xlsxwriter")
 
 

Original file line number	Diff line number	Diff line change
`@@ -117,7 +117,7 @@ def get_sheet_data(`
`117`	`117`	`if notes is not None:`
`118`	`118`	`raise NotImplementedError(`
`119`	`119`	`"""`
`120`		`- Notes are not supported in odfreader engine,`
	`120`	`+ Notes are not supported by the odfreader engine,`
`121`	`121`	`see https://github.com/eea/odfpy`
`122`	`122`	`"""`
`123`	`123`	`)`