REF: Use more context managers to close files (#45579)

mroeschke · web-flow · commit 991fc275994b · 2022-01-23T20:03:45.000-08:00
* Use more context managers

* Add more context managers

* Context closing for openpyxl workbooks

* Context close xlrd objects

* Fix method name

* more closing

* Use mode
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -839,9 +839,8 @@ The simplest case is to just pass in ``parse_dates=True``:
 .. ipython:: python
    :suppress:
 
-   f = open("foo.csv", "w")
-   f.write("date,A,B,C\n20090101,a,1,2\n20090102,b,3,4\n20090103,c,4,5")
-   f.close()
+   with open("foo.csv", mode="w") as f:
+       f.write("date,A,B,C\n20090101,a,1,2\n20090102,b,3,4\n20090103,c,4,5")
 
 .. ipython:: python
 
@@ -1452,16 +1451,15 @@ a different usage of the ``delimiter`` parameter:
 .. ipython:: python
    :suppress:
 
-   f = open("bar.csv", "w")
    data1 = (
        "id8141    360.242940   149.910199   11950.7\n"
        "id1594    444.953632   166.985655   11788.4\n"
        "id1849    364.136849   183.628767   11806.2\n"
        "id1230    413.836124   184.375703   11916.8\n"
        "id1948    502.953953   173.237159   12468.3"
    )
-   f.write(data1)
-   f.close()
+   with open("bar.csv", "w") as f:
+       f.write(data1)
 
 Consider a typical fixed-width data file:
 
@@ -1604,9 +1602,8 @@ of multi-columns indices.
    :suppress:
 
    data = ",a,a,a,b,c,c\n,q,r,s,t,u,v\none,1,2,3,4,5,6\ntwo,7,8,9,10,11,12"
-   fh = open("mi2.csv", "w")
-   fh.write(data)
-   fh.close()
+   with open("mi2.csv", "w") as fh:
+       fh.write(data)
 
 .. ipython:: python
 
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -535,11 +535,16 @@ def load_workbook(self, filepath_or_buffer):
         pass
 
     def close(self) -> None:
-        if hasattr(self, "book") and hasattr(self.book, "close"):
-            # pyxlsb: opens a TemporaryFile
-            # openpyxl: https://stackoverflow.com/questions/31416842/
-            #     openpyxl-does-not-close-excel-workbook-in-read-only-mode
-            self.book.close()
+        if hasattr(self, "book"):
+            if hasattr(self.book, "close"):
+                # pyxlsb: opens a TemporaryFile
+                # openpyxl: https://stackoverflow.com/questions/31416842/
+                #     openpyxl-does-not-close-excel-workbook-in-read-only-mode
+                self.book.close()
+            elif hasattr(self.book, "release_resources"):
+                # xlrd
+                # https://github.com/python-excel/xlrd/blob/2.0.1/xlrd/book.py#L548
+                self.book.release_resources()
         self.handles.close()
 
     @property
@@ -1266,11 +1271,12 @@ def inspect_excel_format(
         elif not peek.startswith(ZIP_SIGNATURE):
             return None
 
-        zf = zipfile.ZipFile(stream)
-
-        # Workaround for some third party files that use forward slashes and
-        # lower case names.
-        component_names = [name.replace("\\", "/").lower() for name in zf.namelist()]
+        with zipfile.ZipFile(stream) as zf:
+            # Workaround for some third party files that use forward slashes and
+            # lower case names.
+            component_names = [
+                name.replace("\\", "/").lower() for name in zf.namelist()
+            ]
 
         if "xl/workbook.xml" in component_names:
             return "xlsx"
diff --git a/pandas/tests/io/excel/test_openpyxl.py b/pandas/tests/io/excel/test_openpyxl.py
@@ -1,3 +1,4 @@
+import contextlib
 from pathlib import Path
 import re
 
@@ -159,12 +160,12 @@ def test_write_append_mode(ext, mode, expected):
         with ExcelWriter(f, engine="openpyxl", mode=mode) as writer:
             df.to_excel(writer, sheet_name="baz", index=False)
 
-        wb2 = openpyxl.load_workbook(f)
-        result = [sheet.title for sheet in wb2.worksheets]
-        assert result == expected
+        with contextlib.closing(openpyxl.load_workbook(f)) as wb2:
+            result = [sheet.title for sheet in wb2.worksheets]
+            assert result == expected
 
-        for index, cell_value in enumerate(expected):
-            assert wb2.worksheets[index]["A1"].value == cell_value
+            for index, cell_value in enumerate(expected):
+                assert wb2.worksheets[index]["A1"].value == cell_value
 
 
 @pytest.mark.parametrize(
@@ -187,15 +188,14 @@ def test_if_sheet_exists_append_modes(ext, if_sheet_exists, num_sheets, expected
         ) as writer:
             df2.to_excel(writer, sheet_name="foo", index=False)
 
-        wb = openpyxl.load_workbook(f)
-        assert len(wb.sheetnames) == num_sheets
-        assert wb.sheetnames[0] == "foo"
-        result = pd.read_excel(wb, "foo", engine="openpyxl")
-        assert list(result["fruit"]) == expected
-        if len(wb.sheetnames) == 2:
-            result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl")
-            tm.assert_frame_equal(result, df2)
-        wb.close()
+        with contextlib.closing(openpyxl.load_workbook(f)) as wb:
+            assert len(wb.sheetnames) == num_sheets
+            assert wb.sheetnames[0] == "foo"
+            result = pd.read_excel(wb, "foo", engine="openpyxl")
+            assert list(result["fruit"]) == expected
+            if len(wb.sheetnames) == 2:
+                result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl")
+                tm.assert_frame_equal(result, df2)
 
 
 @pytest.mark.parametrize(
@@ -279,9 +279,10 @@ def test_to_excel_with_openpyxl_engine(ext):
 def test_read_workbook(datapath, ext, read_only):
     # GH 39528
     filename = datapath("io", "data", "excel", "test1" + ext)
-    wb = openpyxl.load_workbook(filename, read_only=read_only)
-    result = pd.read_excel(wb, engine="openpyxl")
-    wb.close()
+    with contextlib.closing(
+        openpyxl.load_workbook(filename, read_only=read_only)
+    ) as wb:
+        result = pd.read_excel(wb, engine="openpyxl")
     expected = pd.read_excel(filename)
     tm.assert_frame_equal(result, expected)
 
@@ -313,9 +314,10 @@ def test_read_with_bad_dimension(
     if read_only is None:
         result = pd.read_excel(path, header=header)
     else:
-        wb = openpyxl.load_workbook(path, read_only=read_only)
-        result = pd.read_excel(wb, engine="openpyxl", header=header)
-        wb.close()
+        with contextlib.closing(
+            openpyxl.load_workbook(path, read_only=read_only)
+        ) as wb:
+            result = pd.read_excel(wb, engine="openpyxl", header=header)
     expected = DataFrame(expected_data)
     tm.assert_frame_equal(result, expected)
 
@@ -349,9 +351,10 @@ def test_read_with_empty_trailing_rows(datapath, ext, read_only, request):
     if read_only is None:
         result = pd.read_excel(path)
     else:
-        wb = openpyxl.load_workbook(path, read_only=read_only)
-        result = pd.read_excel(wb, engine="openpyxl")
-        wb.close()
+        with contextlib.closing(
+            openpyxl.load_workbook(path, read_only=read_only)
+        ) as wb:
+            result = pd.read_excel(wb, engine="openpyxl")
     expected = DataFrame(
         {
             "Title": [np.nan, "A", 1, 2, 3],
@@ -370,8 +373,9 @@ def test_read_empty_with_blank_row(datapath, ext, read_only):
     if read_only is None:
         result = pd.read_excel(path)
     else:
-        wb = openpyxl.load_workbook(path, read_only=read_only)
-        result = pd.read_excel(wb, engine="openpyxl")
-        wb.close()
+        with contextlib.closing(
+            openpyxl.load_workbook(path, read_only=read_only)
+        ) as wb:
+            result = pd.read_excel(wb, engine="openpyxl")
     expected = DataFrame()
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/excel/test_style.py b/pandas/tests/io/excel/test_style.py
@@ -1,3 +1,5 @@
+import contextlib
+
 import numpy as np
 import pytest
 
@@ -37,13 +39,13 @@ def test_styler_to_excel_unstyled(engine):
             df.style.to_excel(writer, sheet_name="unstyled")
 
         openpyxl = pytest.importorskip("openpyxl")  # test loading only with openpyxl
-        wb = openpyxl.load_workbook(path)
+        with contextlib.closing(openpyxl.load_workbook(path)) as wb:
 
-        for col1, col2 in zip(wb["dataframe"].columns, wb["unstyled"].columns):
-            assert len(col1) == len(col2)
-            for cell1, cell2 in zip(col1, col2):
-                assert cell1.value == cell2.value
-                assert_equal_cell_styles(cell1, cell2)
+            for col1, col2 in zip(wb["dataframe"].columns, wb["unstyled"].columns):
+                assert len(col1) == len(col2)
+                for cell1, cell2 in zip(col1, col2):
+                    assert cell1.value == cell2.value
+                    assert_equal_cell_styles(cell1, cell2)
 
 
 shared_style_params = [
@@ -87,11 +89,11 @@ def test_styler_to_excel_basic(engine, css, attrs, expected):
             styler.to_excel(writer, sheet_name="styled")
 
         openpyxl = pytest.importorskip("openpyxl")  # test loading only with openpyxl
-        wb = openpyxl.load_workbook(path)
+        with contextlib.closing(openpyxl.load_workbook(path)) as wb:
 
-        # test unstyled data cell does not have expected styles
-        # test styled cell has expected styles
-        u_cell, s_cell = wb["dataframe"].cell(2, 2), wb["styled"].cell(2, 2)
+            # test unstyled data cell does not have expected styles
+            # test styled cell has expected styles
+            u_cell, s_cell = wb["dataframe"].cell(2, 2), wb["styled"].cell(2, 2)
         for attr in attrs:
             u_cell, s_cell = getattr(u_cell, attr), getattr(s_cell, attr)
 
@@ -127,12 +129,12 @@ def test_styler_to_excel_basic_indexes(engine, css, attrs, expected):
             styler.to_excel(writer, sheet_name="styled")
 
         openpyxl = pytest.importorskip("openpyxl")  # test loading only with openpyxl
-        wb = openpyxl.load_workbook(path)
+        with contextlib.closing(openpyxl.load_workbook(path)) as wb:
 
-        # test null styled index cells does not have expected styles
-        # test styled cell has expected styles
-        ui_cell, si_cell = wb["null_styled"].cell(2, 1), wb["styled"].cell(2, 1)
-        uc_cell, sc_cell = wb["null_styled"].cell(1, 2), wb["styled"].cell(1, 2)
+            # test null styled index cells does not have expected styles
+            # test styled cell has expected styles
+            ui_cell, si_cell = wb["null_styled"].cell(2, 1), wb["styled"].cell(2, 1)
+            uc_cell, sc_cell = wb["null_styled"].cell(1, 2), wb["styled"].cell(1, 2)
         for attr in attrs:
             ui_cell, si_cell = getattr(ui_cell, attr), getattr(si_cell, attr)
             uc_cell, sc_cell = getattr(uc_cell, attr), getattr(sc_cell, attr)
@@ -163,5 +165,5 @@ def custom_converter(css):
                 writer, sheet_name="custom"
             )
 
-        wb = openpyxl.load_workbook(path)
-        assert wb["custom"].cell(2, 2).font.color.value == "00111222"
+        with contextlib.closing(openpyxl.load_workbook(path)) as wb:
+            assert wb["custom"].cell(2, 2).font.color.value == "00111222"
diff --git a/pandas/tests/io/excel/test_xlrd.py b/pandas/tests/io/excel/test_xlrd.py
@@ -45,13 +45,14 @@ def test_read_xlrd_book(read_ext_xlrd, frame):
 
     with tm.ensure_clean(read_ext_xlrd) as pth:
         df.to_excel(pth, sheet_name)
-        book = xlrd.open_workbook(pth)
-
-        with ExcelFile(book, engine=engine) as xl:
-            result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0)
-            tm.assert_frame_equal(df, result)
-
-        result = pd.read_excel(book, sheet_name=sheet_name, engine=engine, index_col=0)
+        with xlrd.open_workbook(pth) as book:
+            with ExcelFile(book, engine=engine) as xl:
+                result = pd.read_excel(xl, sheet_name=sheet_name, index_col=0)
+                tm.assert_frame_equal(df, result)
+
+            result = pd.read_excel(
+                book, sheet_name=sheet_name, engine=engine, index_col=0
+            )
         tm.assert_frame_equal(df, result)
 
 
diff --git a/pandas/tests/io/excel/test_xlsxwriter.py b/pandas/tests/io/excel/test_xlsxwriter.py
@@ -1,3 +1,4 @@
+import contextlib
 import re
 import warnings
 
@@ -34,12 +35,12 @@ def test_column_format(ext):
             col_format = write_workbook.add_format({"num_format": num_format})
             write_worksheet.set_column("B:B", None, col_format)
 
-        read_workbook = openpyxl.load_workbook(path)
-        try:
-            read_worksheet = read_workbook["Sheet1"]
-        except TypeError:
-            # compat
-            read_worksheet = read_workbook.get_sheet_by_name(name="Sheet1")
+        with contextlib.closing(openpyxl.load_workbook(path)) as read_workbook:
+            try:
+                read_worksheet = read_workbook["Sheet1"]
+            except TypeError:
+                # compat
+                read_worksheet = read_workbook.get_sheet_by_name(name="Sheet1")
 
         # Get the number format from the cell.
         try:
diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py
@@ -592,11 +592,9 @@ def test_file_handles_mmap(c_parser_only, csv1):
     parser = c_parser_only
 
     with open(csv1) as f:
-        m = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
-        parser.read_csv(m)
-
-        assert not m.closed
-        m.close()
+        with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as m:
+            parser.read_csv(m)
+            assert not m.closed
 
 
 def test_file_binary_mode(c_parser_only):
diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
@@ -167,9 +167,8 @@ def test_decompression_regex_sep(python_parser_only, csv1, compression, klass):
     klass = getattr(module, klass)
 
     with tm.ensure_clean() as path:
-        tmp = klass(path, mode="wb")
-        tmp.write(data)
-        tmp.close()
+        with klass(path, mode="wb") as tmp:
+            tmp.write(data)
 
         result = parser.read_csv(path, sep="::", compression=compression)
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py
@@ -1,3 +1,4 @@
+import contextlib
 from datetime import datetime
 import io
 import os
@@ -135,9 +136,8 @@ def test_encoding_options(datapath):
 
     from pandas.io.sas.sas7bdat import SAS7BDATReader
 
-    rdr = SAS7BDATReader(fname, convert_header_text=False)
-    df3 = rdr.read()
-    rdr.close()
+    with contextlib.closing(SAS7BDATReader(fname, convert_header_text=False)) as rdr:
+        df3 = rdr.read()
     for x, y in zip(df1.columns, df3.columns):
         assert x == y.decode()
 
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
@@ -415,8 +415,8 @@ def test_constructor_bad_file(self, mmap_file):
         with pytest.raises(err, match=msg):
             icom._MMapWrapper(non_file)
 
-        target = open(mmap_file)
-        target.close()
+        with open(mmap_file) as target:
+            pass
 
         msg = "I/O operation on closed file"
         with pytest.raises(ValueError, match=msg):