From 5b482fed8b04b6296d39893dab66e4fe65529978 Mon Sep 17 00:00:00 2001
From: Ketu Patel <ketup806@gmail.com>
Date: Fri, 28 Apr 2023 10:42:20 +0530
Subject: [PATCH 01/17] Changes Confirmed

---
 .pre-commit-config.yaml => b/.pre-commit-config.yaml | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)
 rename .pre-commit-config.yaml => b/.pre-commit-config.yaml (98%)

diff --git a/.pre-commit-config.yaml b/b/.pre-commit-config.yaml
similarity index 98%
rename from .pre-commit-config.yaml
rename to b/.pre-commit-config.yaml
index 43b3699907325..fae32b7b01485 100644
--- a/.pre-commit-config.yaml
+++ b/b/.pre-commit-config.yaml
@@ -83,9 +83,6 @@ repos:
     hooks:
     -   id: pylint
         stages: [manual]
--   repo: https://github.com/pycqa/pylint
-    rev: v2.16.2
-    hooks:
     -   id: pylint
         alias: redefined-outer-name
         name: Redefining name from outer scope
@@ -99,6 +96,11 @@ repos:
             |^pandas/conftest\.py  # keep excluded
         args: [--disable=all, --enable=redefined-outer-name]
         stages: [manual]
+    -       id: pylint
+            alias: unspecified-encoding
+            name: Using open without explicitly specifying an encoding
+            args: [--disable=all, --enable=unspecified-encoding]
+            stages: [manual]
 -   repo: https://github.com/PyCQA/isort
     rev: 5.12.0
     hooks:

From c20ac6d72093019999c42c56eb1650adc90f750e Mon Sep 17 00:00:00 2001
From: Ketu Patel <ketup806@gmail.com>
Date: Sat, 29 Apr 2023 14:30:48 +0530
Subject: [PATCH 02/17] Encoding Completed

---
 .pre-commit-config.yaml                      | 432 +++++++++++++++++++
 scripts/check_test_naming.py                 |   2 +-
 scripts/validate_rst_title_capitalization.py |   2 +-
 3 files changed, 434 insertions(+), 2 deletions(-)
 create mode 100644 .pre-commit-config.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000000000..fae32b7b01485
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,432 @@
+minimum_pre_commit_version: 2.15.0
+exclude: ^LICENSES/|\.(html|csv|svg)$
+# reserve "manual" for relatively slow hooks which we still want to run in CI
+default_stages: [
+    commit,
+    merge-commit,
+    push,
+    prepare-commit-msg,
+    commit-msg,
+    post-checkout,
+    post-commit,
+    post-merge,
+    post-rewrite
+]
+ci:
+    autofix_prs: false
+repos:
+-   repo: local
+    hooks:
+    # NOTE: we make `black` a local hook because if it's installed from
+    # PyPI (rather than from source) then it'll run twice as fast thanks to mypyc
+    -   id: black
+        name: black
+        description: "Black: The uncompromising Python code formatter"
+        entry: black
+        language: python
+        require_serial: true
+        types_or: [python, pyi]
+        additional_dependencies: [black==23.1.0]
+-   repo: https://github.com/charliermarsh/ruff-pre-commit
+    rev: v0.0.259
+    hooks:
+    -   id: ruff
+        args: [--exit-non-zero-on-fix]
+-   repo: https://github.com/jendrikseipp/vulture
+    rev: 'v2.7'
+    hooks:
+      - id: vulture
+        entry: python scripts/run_vulture.py
+        pass_filenames: true
+        require_serial: false
+-   repo: https://github.com/codespell-project/codespell
+    rev: v2.2.2
+    hooks:
+    -   id: codespell
+        types_or: [python, rst, markdown, cython, c]
+        additional_dependencies: [tomli]
+-   repo: https://github.com/MarcoGorelli/cython-lint
+    rev: v0.12.5
+    hooks:
+    -   id: cython-lint
+    -   id: double-quote-cython-strings
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+    -   id: debug-statements
+    -   id: end-of-file-fixer
+        exclude: \.txt$
+        stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg,
+                 post-checkout, post-commit, post-merge, post-rewrite]
+    -   id: trailing-whitespace
+        stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg,
+                 post-checkout, post-commit, post-merge, post-rewrite]
+-   repo: https://github.com/cpplint/cpplint
+    rev: 1.6.1
+    hooks:
+    -   id: cpplint
+        # We don't lint all C files because we don't want to lint any that are built
+        # from Cython files nor do we want to lint C files that we didn't modify for
+        # this particular codebase (e.g. src/headers, src/klib). However,
+        # we can lint all header files since they aren't "generated" like C files are.
+        exclude: ^pandas/_libs/src/(klib|headers)/
+        args: [
+            --quiet,
+            '--extensions=c,h',
+            '--headers=h',
+            --recursive,
+            --linelength=88,
+            '--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size'
+        ]
+-   repo: https://github.com/pycqa/pylint
+    rev: v2.16.2
+    hooks:
+    -   id: pylint
+        stages: [manual]
+    -   id: pylint
+        alias: redefined-outer-name
+        name: Redefining name from outer scope
+        files: ^pandas/
+        exclude: |
+            (?x)
+            ^pandas/tests  # keep excluded
+            |/_testing/  # keep excluded
+            |^pandas/util/_test_decorators\.py  # keep excluded
+            |^pandas/_version\.py  # keep excluded
+            |^pandas/conftest\.py  # keep excluded
+        args: [--disable=all, --enable=redefined-outer-name]
+        stages: [manual]
+    -       id: pylint
+            alias: unspecified-encoding
+            name: Using open without explicitly specifying an encoding
+            args: [--disable=all, --enable=unspecified-encoding]
+            stages: [manual]
+-   repo: https://github.com/PyCQA/isort
+    rev: 5.12.0
+    hooks:
+    -   id: isort
+-   repo: https://github.com/asottile/pyupgrade
+    rev: v3.3.1
+    hooks:
+    -   id: pyupgrade
+        args: [--py38-plus]
+-   repo: https://github.com/pre-commit/pygrep-hooks
+    rev: v1.10.0
+    hooks:
+      - id: rst-backticks
+      - id: rst-directive-colons
+        types: [text]  # overwrite types: [rst]
+        types_or: [python, rst]
+      - id: rst-inline-touching-normal
+        types: [text]  # overwrite types: [rst]
+        types_or: [python, rst]
+-   repo: https://github.com/sphinx-contrib/sphinx-lint
+    rev: v0.6.7
+    hooks:
+    - id: sphinx-lint
+-   repo: local
+    hooks:
+    -   id: pyright
+        # note: assumes python env is setup and activated
+        name: pyright
+        entry: pyright
+        language: node
+        pass_filenames: false
+        types: [python]
+        stages: [manual]
+        additional_dependencies: &pyright_dependencies
+        - pyright@1.1.292
+    -   id: pyright_reportGeneralTypeIssues
+        # note: assumes python env is setup and activated
+        name: pyright reportGeneralTypeIssues
+        entry: pyright --skipunannotated -p pyright_reportGeneralTypeIssues.json --level warning
+        language: node
+        pass_filenames: false
+        types: [python]
+        stages: [manual]
+        additional_dependencies: *pyright_dependencies
+    -   id: mypy
+        # note: assumes python env is setup and activated
+        name: mypy
+        entry: mypy
+        language: system
+        pass_filenames: false
+        types: [python]
+        stages: [manual]
+    -   id: stubtest
+        # note: assumes python env is setup and activated
+        # note: requires pandas dev to be installed
+        name: mypy (stubtest)
+        entry: python
+        language: system
+        pass_filenames: false
+        types: [pyi]
+        args: [scripts/run_stubtest.py]
+        stages: [manual]
+    -   id: inconsistent-namespace-usage
+        name: 'Check for inconsistent use of pandas namespace'
+        entry: python scripts/check_for_inconsistent_pandas_namespace.py
+        exclude: ^pandas/core/interchange/
+        language: python
+        types: [python]
+    -   id: no-os-remove
+        name: Check code for instances of os.remove
+        entry: os\.remove
+        language: pygrep
+        types: [python]
+        files: ^pandas/tests/
+        exclude: |
+            (?x)^
+            pandas/tests/io/pytables/test_store\.py$
+    -   id: unwanted-patterns
+        name: Unwanted patterns
+        language: pygrep
+        entry: |
+            (?x)
+            # outdated annotation syntax, missing error codes
+            \#\ type:\ (?!ignore)
+            |\#\ type:\s?ignore(?!\[)
+
+            # foo._class__ instead of type(foo)
+            |\.__class__
+
+            # np.bool/np.object instead of np.bool_/np.object_
+            |np\.bool[^_8`]
+            |np\.object[^_8`]
+
+            # imports from collections.abc instead of `from collections import abc`
+            |from\ collections\.abc\ import
+
+            # Numpy
+            |from\ numpy\ import\ random
+            |from\ numpy\.random\ import
+
+            # Incorrect code-block / IPython directives
+            |\.\.\ code-block\ ::
+            |\.\.\ ipython\ ::
+            # directive should not have a space before ::
+            |\.\.\ \w+\ ::
+
+            # Check for deprecated messages without sphinx directive
+            |(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)
+
+            # {foo!r} instead of {repr(foo)}
+            |!r}
+
+            # builtin filter function
+            |(?<!def)[\(\s]filter\(
+
+            # exec
+            |[^a-zA-Z0-9_]exec\(
+        types_or: [python, cython, rst]
+        exclude: ^doc/source/development/code_style\.rst  # contains examples of patterns to avoid
+    -   id: cython-casting
+        name: Check Cython casting is `<type>obj`, not `<type> obj`
+        language: pygrep
+        entry: '[a-zA-Z0-9*]> '
+        files: (\.pyx|\.pxi.in)$
+    -   id: incorrect-backticks
+        name: Check for backticks incorrectly rendering because of missing spaces
+        language: pygrep
+        entry: '[a-zA-Z0-9]\`\`?[a-zA-Z0-9]'
+        types: [rst]
+        files: ^doc/source/
+    -   id: seed-check-asv
+        name: Check for unnecessary random seeds in asv benchmarks
+        language: pygrep
+        entry: 'np\.random\.seed'
+        files: ^asv_bench/benchmarks
+        exclude: ^asv_bench/benchmarks/pandas_vb_common\.py
+    -   id: np-testing-array-equal
+        name: Check for usage of numpy testing or array_equal
+        language: pygrep
+        entry: '(numpy|np)(\.testing|\.array_equal)'
+        files: ^pandas/tests/
+        types: [python]
+    -   id: invalid-ea-testing
+        name: Check for invalid EA testing
+        language: pygrep
+        entry: 'tm\.assert_(series|frame)_equal'
+        files: ^pandas/tests/extension/base
+        types: [python]
+        exclude: ^pandas/tests/extension/base/base\.py
+    -   id: unwanted-patterns-in-tests
+        name: Unwanted patterns in tests
+        language: pygrep
+        entry: |
+            (?x)
+            # pytest.xfail instead of pytest.mark.xfail
+            pytest\.xfail
+
+            # imports from pandas._testing instead of `import pandas._testing as tm`
+            |from\ pandas\._testing\ import
+            |from\ pandas\ import\ _testing\ as\ tm
+
+            # No direct imports from conftest
+            |conftest\ import
+            |import\ conftest
+
+            # pandas.testing instead of tm
+            |pd\.testing\.
+
+            # pd.api.types instead of from pandas.api.types import ...
+            |(pd|pandas)\.api\.types\.
+
+            # np.testing, np.array_equal
+            |(numpy|np)(\.testing|\.array_equal)
+
+            # unittest.mock (use pytest builtin monkeypatch fixture instead)
+            |(unittest(\.| import )mock|mock\.Mock\(\)|mock\.patch)
+
+            # pytest raises without context
+            |\s\ pytest.raises
+
+            # pytest.warns (use tm.assert_produces_warning instead)
+            |pytest\.warns
+        files: ^pandas/tests/
+        types_or: [python, cython, rst]
+    -   id: unwanted-patterns-in-ea-tests
+        name: Unwanted patterns in EA tests
+        language: pygrep
+        entry: |
+            (?x)
+            tm.assert_(series|frame)_equal
+        files: ^pandas/tests/extension/base/
+        exclude: ^pandas/tests/extension/base/base\.py$
+        types_or: [python, cython, rst]
+    -   id: unwanted-patterns-in-cython
+        name: Unwanted patterns in Cython code
+        language: pygrep
+        entry: |
+            (?x)
+            # `<type>obj` as opposed to `<type> obj`
+            [a-zA-Z0-9*]>[ ]
+        types: [cython]
+    -   id: pip-to-conda
+        name: Generate pip dependency from conda
+        language: python
+        entry: python scripts/generate_pip_deps_from_conda.py
+        files: ^(environment.yml|requirements-dev.txt)$
+        pass_filenames: false
+        additional_dependencies: [tomli, pyyaml]
+    -   id: title-capitalization
+        name: Validate correct capitalization among titles in documentation
+        entry: python scripts/validate_rst_title_capitalization.py
+        language: python
+        types: [rst]
+        files: ^doc/source/(development|reference)/
+    -   id: unwanted-patterns-bare-pytest-raises
+        name: Check for use of bare pytest raises
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="bare_pytest_raises"
+        types: [python]
+        files: ^pandas/tests/
+        exclude: ^pandas/tests/extension/
+    -   id: unwanted-patterns-private-function-across-module
+        name: Check for use of private functions across modules
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module"
+        types: [python]
+        exclude: ^(asv_bench|pandas/tests|doc)/
+    -   id: unwanted-patterns-private-import-across-module
+        name: Check for import of private attributes across modules
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module"
+        types: [python]
+        exclude: |
+            (?x)
+            ^(asv_bench|pandas/tests|doc)/
+            |scripts/validate_min_versions_in_sync\.py$
+    -   id: unwanted-patterns-strings-with-misplaced-whitespace
+        name: Check for strings with misplaced spaces
+        language: python
+        entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace"
+        types_or: [python, cython]
+    -   id: use-pd_array-in-core
+        name: Import pandas.array as pd_array in core
+        language: python
+        entry: python scripts/use_pd_array_in_core.py
+        files: ^pandas/core/
+        exclude: ^pandas/core/api\.py$
+        types: [python]
+    -   id: use-io-common-urlopen
+        name: Use pandas.io.common.urlopen instead of urllib.request.urlopen
+        language: python
+        entry: python scripts/use_io_common_urlopen.py
+        files: ^pandas/
+        exclude: ^pandas/tests/
+        types: [python]
+    -   id: no-bool-in-core-generic
+        name: Use bool_t instead of bool in pandas/core/generic.py
+        entry: python scripts/no_bool_in_generic.py
+        language: python
+        files: ^pandas/core/generic\.py$
+    -   id: no-return-exception
+        name: Use raise instead of return for exceptions
+        language: pygrep
+        entry: 'return [A-Za-z]+(Error|Exit|Interrupt|Exception|Iteration)'
+        files: ^pandas/
+        types: [python]
+        exclude: ^pandas/tests/
+    -   id: pandas-errors-documented
+        name: Ensure pandas errors are documented in doc/source/reference/testing.rst
+        entry: python scripts/pandas_errors_documented.py
+        language: python
+        files: ^pandas/errors/__init__.py$
+    -   id: pg8000-not-installed-CI
+        name: Check for pg8000 not installed on CI for test_pg8000_sqlalchemy_passthrough_error
+        language: pygrep
+        entry: 'pg8000'
+        files: ^ci/deps
+        types: [yaml]
+    -   id: validate-min-versions-in-sync
+        name: Check minimum version of dependencies are aligned
+        entry: python -m scripts.validate_min_versions_in_sync
+        language: python
+        files: ^(ci/deps/actions-.*-minimum_versions\.yaml|pandas/compat/_optional\.py)$
+        additional_dependencies: [tomli, pyyaml]
+        pass_filenames: false
+    -   id: validate-errors-locations
+        name: Validate errors locations
+        description: Validate errors are in appropriate locations.
+        entry: python scripts/validate_exception_location.py
+        language: python
+        files: ^pandas/
+        exclude: ^(pandas/_libs/|pandas/tests/|pandas/errors/__init__.py$|pandas/_version.py)
+        types: [python]
+    -   id: future-annotations
+        name: import annotations from __future__
+        entry: 'from __future__ import annotations'
+        language: pygrep
+        args: [--negate]
+        files: ^pandas/
+        types: [python]
+        exclude: |
+            (?x)
+            /(__init__\.py)|(api\.py)|(_version\.py)|(testing\.py)|(conftest\.py)$
+            |/tests/
+            |/_testing/
+    -   id: autotyping
+        name: autotyping
+        entry: python -m scripts.run_autotyping
+        types_or: [python, pyi]
+        files: ^pandas
+        exclude: ^(pandas/tests|pandas/_version.py|pandas/io/clipboard)
+        language: python
+        stages: [manual]
+        additional_dependencies:
+        - autotyping==23.3.0
+        - libcst==0.4.9
+    -   id: check-test-naming
+        name: check that test names start with 'test'
+        entry: python -m scripts.check_test_naming
+        types: [python]
+        files: ^pandas/tests
+        language: python
+    -   id: sort-whatsnew-items
+        name: sort whatsnew entries alphabetically
+        entry: python -m scripts.sort_whatsnew_note
+        types: [rst]
+        language: python
+        files: ^doc/source/whatsnew/v
+        exclude: ^doc/source/whatsnew/v(0|1|2\.0\.0)
diff --git a/scripts/check_test_naming.py b/scripts/check_test_naming.py
index 33890feb8692d..158cf46f264c2 100644
--- a/scripts/check_test_naming.py
+++ b/scripts/check_test_naming.py
@@ -118,7 +118,7 @@ def main(content: str, file: str) -> int:
                     assert isinstance(_node, ast.FunctionDef)  # help mypy
                     should_continue = False
                     for _file in (Path("pandas") / "tests").rglob("*.py"):
-                        with open(os.path.join(_file)) as fd:
+                        with open(os.path.join(_file), encoding="utf-8") as fd:
                             _content = fd.read()
                         if f"self.{_node.name}" in _content:
                             should_continue = True
diff --git a/scripts/validate_rst_title_capitalization.py b/scripts/validate_rst_title_capitalization.py
index 4446ed62f6b8a..0f4c11eb30b07 100755
--- a/scripts/validate_rst_title_capitalization.py
+++ b/scripts/validate_rst_title_capitalization.py
@@ -226,7 +226,7 @@ def find_titles(rst_file: str) -> Iterable[tuple[str, int]]:
         The corresponding line number of the heading.
     """
 
-    with open(rst_file) as fd:
+    with open(rst_file, encoding="utf-8") as fd:
         previous_line = ""
         for i, line in enumerate(fd):
             line_no_last_elem = line[:-1]

From 682fdf836dbb96b55c2ba4f90558eef608c6700b Mon Sep 17 00:00:00 2001
From: Ketu Patel <ketup806@gmail.com>
Date: Sat, 29 Apr 2023 15:25:59 +0530
Subject: [PATCH 03/17] Spaces Are Completed

---
 .pre-commit-config.yaml   | 10 +++++-----
 b/.pre-commit-config.yaml | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index fae32b7b01485..34cd91940f014 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -96,11 +96,11 @@ repos:
             |^pandas/conftest\.py  # keep excluded
         args: [--disable=all, --enable=redefined-outer-name]
         stages: [manual]
-    -       id: pylint
-            alias: unspecified-encoding
-            name: Using open without explicitly specifying an encoding
-            args: [--disable=all, --enable=unspecified-encoding]
-            stages: [manual]
+    -   id: pylint
+        alias: unspecified-encoding
+        name: Using open without explicitly specifying an encoding
+        args: [--disable=all, --enable=unspecified-encoding]
+        stages: [manual]
 -   repo: https://github.com/PyCQA/isort
     rev: 5.12.0
     hooks:
diff --git a/b/.pre-commit-config.yaml b/b/.pre-commit-config.yaml
index fae32b7b01485..34cd91940f014 100644
--- a/b/.pre-commit-config.yaml
+++ b/b/.pre-commit-config.yaml
@@ -96,11 +96,11 @@ repos:
             |^pandas/conftest\.py  # keep excluded
         args: [--disable=all, --enable=redefined-outer-name]
         stages: [manual]
-    -       id: pylint
-            alias: unspecified-encoding
-            name: Using open without explicitly specifying an encoding
-            args: [--disable=all, --enable=unspecified-encoding]
-            stages: [manual]
+    -   id: pylint
+        alias: unspecified-encoding
+        name: Using open without explicitly specifying an encoding
+        args: [--disable=all, --enable=unspecified-encoding]
+        stages: [manual]
 -   repo: https://github.com/PyCQA/isort
     rev: 5.12.0
     hooks:

From a5fbd475b7f5ef2312d5bb51372049eb914e80ff Mon Sep 17 00:00:00 2001
From: Ketu Patel <ketup806@gmail.com>
Date: Sat, 29 Apr 2023 17:31:14 +0530
Subject: [PATCH 04/17] Pre-ccommit manually completed

---
 asv_bench/benchmarks/io/csv.py                |  2 +-
 doc/make.py                                   |  8 +++---
 doc/source/conf.py                            |  4 +--
 pandas/_testing/contexts.py                   |  2 +-
 pandas/_version.py                            |  2 +-
 pandas/core/series.py                         |  2 +-
 pandas/io/clipboard/__init__.py               |  4 +--
 pandas/tests/frame/methods/test_to_csv.py     |  2 +-
 pandas/tests/io/excel/test_readers.py         |  2 +-
 pandas/tests/io/formats/style/test_html.py    |  2 +-
 pandas/tests/io/formats/test_to_csv.py        | 24 ++++++++---------
 pandas/tests/io/formats/test_to_latex.py      |  2 +-
 pandas/tests/io/json/test_pandas.py           |  2 +-
 .../tests/io/parser/common/test_chunksize.py  |  2 +-
 .../io/parser/common/test_file_buffer_url.py  |  6 ++---
 .../tests/io/parser/common/test_iterator.py   |  4 +--
 pandas/tests/io/parser/test_c_parser_only.py  |  6 ++---
 pandas/tests/io/parser/test_compression.py    |  4 +--
 pandas/tests/io/test_common.py                |  4 +--
 pandas/tests/io/test_compression.py           |  4 +--
 pandas/tests/io/test_gcs.py                   |  2 +-
 pandas/tests/io/test_html.py                  |  2 +-
 pandas/tests/io/xml/test_to_xml.py            |  6 ++---
 pandas/tests/io/xml/test_xml.py               | 26 +++++++++----------
 pandas/tests/io/xml/test_xml_dtypes.py        |  2 +-
 pandas/tests/series/methods/test_to_csv.py    |  4 +--
 pandas/tests/util/test_show_versions.py       |  4 +--
 scripts/generate_pxi.py                       |  4 +--
 scripts/generate_version.py                   |  2 +-
 scripts/pandas_errors_documented.py           |  2 +-
 scripts/sort_whatsnew_note.py                 |  4 +--
 .../test_validate_min_versions_in_sync.py     |  4 +--
 scripts/validate_docstrings.py                |  2 +-
 scripts/validate_exception_location.py        |  2 +-
 scripts/validate_min_versions_in_sync.py      |  4 +--
 setup.py                                      |  4 +--
 web/pandas_web.py                             | 14 +++++-----
 37 files changed, 88 insertions(+), 88 deletions(-)

diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py
index 36301d22db5d3..856327dfe876f 100644
--- a/asv_bench/benchmarks/io/csv.py
+++ b/asv_bench/benchmarks/io/csv.py
@@ -444,7 +444,7 @@ class ReadCSVMemoryGrowth(BaseIO):
     param_names = ["engine"]
 
     def setup(self, engine):
-        with open(self.fname, "w") as f:
+        with open(self.fname, "w",encoding="utf-8") as f:
             for i in range(self.num_rows):
                 f.write(f"{i}\n")
 
diff --git a/doc/make.py b/doc/make.py
index f5bf170c6274d..78edbc997eb55 100755
--- a/doc/make.py
+++ b/doc/make.py
@@ -163,12 +163,12 @@ def _get_page_title(self, page):
             components=(docutils.parsers.rst.Parser,)
         )
         doc = docutils.utils.new_document("<doc>", option_parser.get_default_values())
-        with open(fname) as f:
+        with open(fname,encoding="utf-8") as f:
             data = f.read()
 
         parser = docutils.parsers.rst.Parser()
         # do not generate any warning when parsing the rst
-        with open(os.devnull, "a") as f:
+        with open(os.devnull, "a",encoding="utf-8") as f:
             doc.reporter.stream = f
             parser.parse(data, doc)
 
@@ -186,7 +186,7 @@ def _add_redirects(self):
         Create in the build directory an html file with a redirect,
         for every row in REDIRECTS_FILE.
         """
-        with open(REDIRECTS_FILE) as mapping_fd:
+        with open(REDIRECTS_FILE,encoding="utf-8") as mapping_fd:
             reader = csv.reader(mapping_fd)
             for row in reader:
                 if not row or row[0].strip().startswith("#"):
@@ -209,7 +209,7 @@ def _add_redirects(self):
                     # sphinx specific stuff
                     title = "this page"
 
-                with open(path, "w") as moved_page_fd:
+                with open(path, "w",encoding="utf-8") as moved_page_fd:
                     html = f"""\
 <html>
     <head>
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 0219c0e4f05ba..e5e764f4c7ef4 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -117,9 +117,9 @@
                 elif single_doc and rel_fname != pattern:
                     exclude_patterns.append(rel_fname)
 
-with open(os.path.join(source_path, "index.rst.template")) as f:
+with open(os.path.join(source_path, "index.rst.template"),encoding="utf-8") as f:
     t = jinja2.Template(f.read())
-with open(os.path.join(source_path, "index.rst"), "w") as f:
+with open(os.path.join(source_path, "index.rst"), "w",encoding="utf-8") as f:
     f.write(
         t.render(
             include_api=include_api,
diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py
index ab00c80886794..d36dedc3008ac 100644
--- a/pandas/_testing/contexts.py
+++ b/pandas/_testing/contexts.py
@@ -126,7 +126,7 @@ def ensure_clean(
     handle_or_str: str | IO = str(path)
     if return_filelike:
         kwargs.setdefault("mode", "w+b")
-        handle_or_str = open(path, **kwargs)
+        handle_or_str = open(path, **kwargs,encoding="utf-8")
 
     try:
         yield handle_or_str
diff --git a/pandas/_version.py b/pandas/_version.py
index 6705b8505f7e2..59215ff72e715 100644
--- a/pandas/_version.py
+++ b/pandas/_version.py
@@ -159,7 +159,7 @@ def git_get_keywords(versionfile_abs):
     # _version.py.
     keywords = {}
     try:
-        with open(versionfile_abs) as fobj:
+        with open(versionfile_abs,encoding="utf-8") as fobj:
             for line in fobj:
                 if line.strip().startswith("git_refnames ="):
                     mo = re.search(r'=\s*"(.*)"', line)
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 2b71eb4a9480d..d7db059b7cba5 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1699,7 +1699,7 @@ def to_string(
             if hasattr(buf, "write"):
                 buf.write(result)
             else:
-                with open(buf, "w") as f:
+                with open(buf, "w",encoding="utf-8") as f:
                     f.write(result)
         return None
 
diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py
index e574ed2c8059a..d29c6c216256c 100644
--- a/pandas/io/clipboard/__init__.py
+++ b/pandas/io/clipboard/__init__.py
@@ -282,11 +282,11 @@ def copy_dev_clipboard(text):
                 stacklevel=find_stack_level(),
             )
 
-        with open("/dev/clipboard", "w") as fd:
+        with open("/dev/clipboard", "w",encoding="utf-8") as fd:
             fd.write(text)
 
     def paste_dev_clipboard() -> str:
-        with open("/dev/clipboard") as fd:
+        with open("/dev/clipboard",encoding="utf-8") as fd:
             content = fd.read()
         return content
 
diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
index 639c6f9d73511..494bff217b60e 100644
--- a/pandas/tests/frame/methods/test_to_csv.py
+++ b/pandas/tests/frame/methods/test_to_csv.py
@@ -626,7 +626,7 @@ def test_to_csv_float32_nanrep(self):
         with tm.ensure_clean("__tmp_to_csv_float32_nanrep__.csv") as path:
             df.to_csv(path, na_rep=999)
 
-            with open(path) as f:
+            with open(path,encoding="utf-8") as f:
                 lines = f.readlines()
                 assert lines[1].split(",")[2] == "999"
 
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index 37ecce84e3caa..cf0df8e5c23e5 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -1702,7 +1702,7 @@ def test_corrupt_files_closed(self, engine, read_ext):
             errors = (BadZipFile, xlrd.biffh.XLRDError)
 
         with tm.ensure_clean(f"corrupt{read_ext}") as file:
-            Path(file).write_text("corrupt")
+            Path(file).write_text("corrupt",encoding="utf-8")
             with tm.assert_produces_warning(False):
                 try:
                     pd.ExcelFile(file, engine=engine)
diff --git a/pandas/tests/io/formats/style/test_html.py b/pandas/tests/io/formats/style/test_html.py
index 67f7e12fcc3c2..d06e7ef254469 100644
--- a/pandas/tests/io/formats/style/test_html.py
+++ b/pandas/tests/io/formats/style/test_html.py
@@ -43,7 +43,7 @@ def tpl_table():
 def test_html_template_extends_options():
     # make sure if templates are edited tests are updated as are setup fixtures
     # to understand the dependency
-    with open("pandas/io/formats/templates/html.tpl") as file:
+    with open("pandas/io/formats/templates/html.tpl",encoding="utf-8") as file:
         result = file.read()
     assert "{% include html_style_tpl %}" in result
     assert "{% include html_table_tpl %}" in result
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index 81dc79d3111b8..2c2ed7f8514c7 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -32,7 +32,7 @@ def test_to_csv_with_single_column(self):
 """
         with tm.ensure_clean("test.csv") as path:
             df1.to_csv(path, header=None, index=None)
-            with open(path) as f:
+            with open(path,encoding="utf-8") as f:
                 assert f.read() == expected1
 
         df2 = DataFrame([1, None])
@@ -42,7 +42,7 @@ def test_to_csv_with_single_column(self):
 """
         with tm.ensure_clean("test.csv") as path:
             df2.to_csv(path, header=None, index=None)
-            with open(path) as f:
+            with open(path,encoding="utf-8") as f:
                 assert f.read() == expected2
 
     def test_to_csv_default_encoding(self):
@@ -64,7 +64,7 @@ def test_to_csv_quotechar(self):
 
         with tm.ensure_clean("test.csv") as path:
             df.to_csv(path, quoting=1)  # 1=QUOTE_ALL
-            with open(path) as f:
+            with open(path,encoding="utf-8") as f:
                 assert f.read() == expected
 
         expected = """\
@@ -75,7 +75,7 @@ def test_to_csv_quotechar(self):
 
         with tm.ensure_clean("test.csv") as path:
             df.to_csv(path, quoting=1, quotechar="$")
-            with open(path) as f:
+            with open(path,encoding="utf-8") as f:
                 assert f.read() == expected
 
         with tm.ensure_clean("test.csv") as path:
@@ -92,7 +92,7 @@ def test_to_csv_doublequote(self):
 
         with tm.ensure_clean("test.csv") as path:
             df.to_csv(path, quoting=1, doublequote=True)  # QUOTE_ALL
-            with open(path) as f:
+            with open(path,encoding="utf-8") as f:
                 assert f.read() == expected
 
         with tm.ensure_clean("test.csv") as path:
@@ -109,7 +109,7 @@ def test_to_csv_escapechar(self):
 
         with tm.ensure_clean("test.csv") as path:  # QUOTE_ALL
             df.to_csv(path, quoting=1, doublequote=False, escapechar="\\")
-            with open(path) as f:
+            with open(path,encoding="utf-8") as f:
                 assert f.read() == expected
 
         df = DataFrame({"col": ["a,a", ",bb,"]})
@@ -121,7 +121,7 @@ def test_to_csv_escapechar(self):
 
         with tm.ensure_clean("test.csv") as path:
             df.to_csv(path, quoting=3, escapechar="\\")  # QUOTE_NONE
-            with open(path) as f:
+            with open(path,encoding="utf-8") as f:
                 assert f.read() == expected
 
     def test_csv_to_string(self):
@@ -401,7 +401,7 @@ def test_to_csv_string_array_ascii(self):
 """
         with tm.ensure_clean("str_test.csv") as path:
             df.to_csv(path, encoding="ascii")
-            with open(path) as f:
+            with open(path,encoding="utf-8") as f:
                 assert f.read() == expected_ascii
 
     def test_to_csv_string_array_utf8(self):
@@ -415,7 +415,7 @@ def test_to_csv_string_array_utf8(self):
 """
         with tm.ensure_clean("unicode_test.csv") as path:
             df.to_csv(path, encoding="utf-8")
-            with open(path) as f:
+            with open(path,encoding="utf-8") as f:
                 assert f.read() == expected_utf8
 
     def test_to_csv_string_with_lf(self):
@@ -521,10 +521,10 @@ def test_to_csv_write_to_open_file(self):
 z
 """
         with tm.ensure_clean("test.txt") as path:
-            with open(path, "w") as f:
+            with open(path, "w",encoding="utf-8") as f:
                 f.write("manual header\n")
                 df.to_csv(f, header=None, index=None)
-            with open(path) as f:
+            with open(path,encoding="utf-8") as f:
                 assert f.read() == expected
 
     def test_to_csv_write_to_open_file_with_newline_py3(self):
@@ -534,7 +534,7 @@ def test_to_csv_write_to_open_file_with_newline_py3(self):
         expected_rows = ["x", "y", "z"]
         expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows)
         with tm.ensure_clean("test.txt") as path:
-            with open(path, "w", newline="") as f:
+            with open(path, "w", newline="",encoding="utf-8") as f:
                 f.write("manual header\n")
                 df.to_csv(f, header=None, index=None)
 
diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py
index 64c064172a646..c46aa609922a3 100644
--- a/pandas/tests/io/formats/test_to_latex.py
+++ b/pandas/tests/io/formats/test_to_latex.py
@@ -34,7 +34,7 @@ class TestToLatex:
     def test_to_latex_to_file(self, float_frame):
         with tm.ensure_clean("test.tex") as path:
             float_frame.to_latex(path)
-            with open(path) as f:
+            with open(path,encoding="utf-8") as f:
                 assert float_frame.to_latex() == f.read()
 
     def test_to_latex_to_file_utf8_with_encoding(self):
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 5fc04509b86b6..377e1cc3d99ba 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -1214,7 +1214,7 @@ def test_read_s3_jsonl(self, s3_resource, s3so):
     def test_read_local_jsonl(self):
         # GH17200
         with tm.ensure_clean("tmp_items.json") as path:
-            with open(path, "w") as infile:
+            with open(path, "w",encoding="utf-8") as infile:
                 infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n')
             result = read_json(path, lines=True)
             expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
diff --git a/pandas/tests/io/parser/common/test_chunksize.py b/pandas/tests/io/parser/common/test_chunksize.py
index c8cef56c73902..14327a98ec47e 100644
--- a/pandas/tests/io/parser/common/test_chunksize.py
+++ b/pandas/tests/io/parser/common/test_chunksize.py
@@ -228,7 +228,7 @@ def test_read_csv_memory_growth_chunksize(all_parsers):
     parser = all_parsers
 
     with tm.ensure_clean() as path:
-        with open(path, "w") as f:
+        with open(path, "w",encoding="utf-8") as f:
             for i in range(1000):
                 f.write(str(i) + "\n")
 
diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py
index c11a59a8b4660..ba41b46f37099 100644
--- a/pandas/tests/io/parser/common/test_file_buffer_url.py
+++ b/pandas/tests/io/parser/common/test_file_buffer_url.py
@@ -107,7 +107,7 @@ def test_no_permission(all_parsers):
 
         # verify that this process cannot open the file (not running as sudo)
         try:
-            with open(path):
+            with open(path,encoding="utf-8"):
                 pass
             pytest.skip("Running as sudo.")
         except PermissionError:
@@ -285,7 +285,7 @@ def test_file_handles_with_open(all_parsers, csv1):
     parser = all_parsers
 
     for mode in ["r", "rb"]:
-        with open(csv1, mode) as f:
+        with open(csv1, mode,encoding="utf-8") as f:
             parser.read_csv(f)
             assert not f.closed
 
@@ -392,7 +392,7 @@ def test_context_manageri_user_provided(all_parsers, datapath):
     # make sure that user-provided handles are not closed
     parser = all_parsers
 
-    with open(datapath("io", "data", "csv", "iris.csv")) as path:
+    with open(datapath("io", "data", "csv", "iris.csv"),encoding="utf-8") as path:
         reader = parser.read_csv(path, chunksize=1)
         assert not reader.handles.handle.closed
         try:
diff --git a/pandas/tests/io/parser/common/test_iterator.py b/pandas/tests/io/parser/common/test_iterator.py
index 939ed0e73a5ee..f517c94998138 100644
--- a/pandas/tests/io/parser/common/test_iterator.py
+++ b/pandas/tests/io/parser/common/test_iterator.py
@@ -95,10 +95,10 @@ def test_iteration_open_handle(all_parsers):
     kwargs = {"header": None}
 
     with tm.ensure_clean() as path:
-        with open(path, "w") as f:
+        with open(path, "w",encoding="utf-8") as f:
             f.write("AAA\nBBB\nCCC\nDDD\nEEE\nFFF\nGGG")
 
-        with open(path) as f:
+        with open(path,encoding="utf-8") as f:
             for line in f:
                 if "CCC" in line:
                     break
diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py
index c2a65704a845a..d45f47a805019 100644
--- a/pandas/tests/io/parser/test_c_parser_only.py
+++ b/pandas/tests/io/parser/test_c_parser_only.py
@@ -603,7 +603,7 @@ def test_file_handles_mmap(c_parser_only, csv1):
     # Don't close user provided file handles.
     parser = c_parser_only
 
-    with open(csv1) as f:
+    with open(csv1,encoding="utf-8") as f:
         with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as m:
             parser.read_csv(m)
             assert not m.closed
@@ -615,7 +615,7 @@ def test_file_binary_mode(c_parser_only):
     expected = DataFrame([[1, 2, 3], [4, 5, 6]])
 
     with tm.ensure_clean() as path:
-        with open(path, "w") as f:
+        with open(path, "w",encoding="utf-8") as f:
             f.write("1,2,3\n4,5,6")
 
         with open(path, "rb") as f:
@@ -627,7 +627,7 @@ def test_unix_style_breaks(c_parser_only):
     # GH 11020
     parser = c_parser_only
     with tm.ensure_clean() as path:
-        with open(path, "w", newline="\n") as f:
+        with open(path, "w", newline="\n",encoding="utf-8") as f:
             f.write("blah\n\ncol_1,col_2,col_3\n\n")
         result = parser.read_csv(path, skiprows=2, encoding="utf-8", engine="c")
     expected = DataFrame(columns=["col_1", "col_2", "col_3"])
diff --git a/pandas/tests/io/parser/test_compression.py b/pandas/tests/io/parser/test_compression.py
index ab00e31bd9b43..7ab9237637f84 100644
--- a/pandas/tests/io/parser/test_compression.py
+++ b/pandas/tests/io/parser/test_compression.py
@@ -129,7 +129,7 @@ def test_infer_compression(all_parsers, csv1, buffer, ext):
     kwargs["compression"] = "infer"
 
     if buffer:
-        with open(csv1) as f:
+        with open(csv1,encoding="utf-8") as f:
             result = parser.read_csv(f, **kwargs)
     else:
         ext = "." + ext if ext else ""
@@ -183,7 +183,7 @@ def test_ignore_compression_extension(all_parsers):
         with tm.ensure_clean("test.csv.zip") as path_zip:
             # make sure to create un-compressed file with zip extension
             df.to_csv(path_csv, index=False)
-            Path(path_zip).write_text(Path(path_csv).read_text())
+            Path(path_zip).write_text(Path(path_csv).read_text(encoding="utf-8"),encoding="utf-8")
 
             tm.assert_frame_equal(parser.read_csv(path_zip, compression=None), df)
 
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index b248c0c460c74..4e273edfeec69 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -411,7 +411,7 @@ def test_constructor_bad_file(self, mmap_file):
         with pytest.raises(err, match=msg):
             icom._maybe_memory_map(non_file, True)
 
-        with open(mmap_file) as target:
+        with open(mmap_file,encoding="utf-8") as target:
             pass
 
         msg = "I/O operation on closed file"
@@ -419,7 +419,7 @@ def test_constructor_bad_file(self, mmap_file):
             icom._maybe_memory_map(target, True)
 
     def test_next(self, mmap_file):
-        with open(mmap_file) as target:
+        with open(mmap_file,encoding="utf-8") as target:
             lines = target.readlines()
 
             with icom.get_handle(
diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py
index fc15ff3488ce9..da159566bd8be 100644
--- a/pandas/tests/io/test_compression.py
+++ b/pandas/tests/io/test_compression.py
@@ -299,10 +299,10 @@ def test_ambiguous_archive_zip():
 
 def test_ambiguous_archive_tar(tmp_path):
     csvAPath = tmp_path / "a.csv"
-    with open(csvAPath, "w") as a:
+    with open(csvAPath, "w",encoding="utf-8") as a:
         a.write("foo,bar\n")
     csvBPath = tmp_path / "b.csv"
-    with open(csvBPath, "w") as b:
+    with open(csvBPath, "w",encoding="utf-8") as b:
         b.write("foo,bar\n")
 
     tarpath = tmp_path / "archive.tar"
diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py
index b65a19d766976..f1fea1732492c 100644
--- a/pandas/tests/io/test_gcs.py
+++ b/pandas/tests/io/test_gcs.py
@@ -193,7 +193,7 @@ class MockGCSFileSystem(AbstractFileSystem):
         def open(self, path, mode="r", *args):
             if "w" not in mode:
                 raise FileNotFoundError
-            return open(os.path.join(tmpdir, "test.parquet"), mode)
+            return open(os.path.join(tmpdir, "test.parquet"), mode,encoding="utf-8")
 
     monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem)
     df1.to_parquet(
diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
index 4bd4e0cd7146f..bf82c14a88e8e 100644
--- a/pandas/tests/io/test_html.py
+++ b/pandas/tests/io/test_html.py
@@ -692,7 +692,7 @@ def try_remove_ws(x):
     @pytest.mark.slow
     def test_gold_canyon(self, banklist_data):
         gc = "Gold Canyon"
-        with open(banklist_data) as f:
+        with open(banklist_data,encoding="utf-8") as f:
             raw_text = f.read()
 
         assert gc in raw_text
diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py
index 4843f40d6813d..b58064745ee19 100644
--- a/pandas/tests/io/xml/test_to_xml.py
+++ b/pandas/tests/io/xml/test_to_xml.py
@@ -983,7 +983,7 @@ def test_unknown_parser():
 def test_stylesheet_file_like(datapath, mode):
     xsl = datapath("io", "data", "xml", "row_field_output.xsl")
 
-    with open(xsl, mode) as f:
+    with open(xsl, mode,encoding="utf-8") as f:
         assert geom_df.to_xml(stylesheet=f) == xsl_expected
 
 
@@ -995,7 +995,7 @@ def test_stylesheet_io(datapath, mode):
     # consider using --check-untyped-defs
     xsl_obj: BytesIO | StringIO  # type: ignore[annotation-unchecked]
 
-    with open(xsl_path, mode) as f:
+    with open(xsl_path, mode,encoding="utf-8") as f:
         if mode == "rb":
             xsl_obj = BytesIO(f.read())
         else:
@@ -1010,7 +1010,7 @@ def test_stylesheet_io(datapath, mode):
 def test_stylesheet_buffered_reader(datapath, mode):
     xsl = datapath("io", "data", "xml", "row_field_output.xsl")
 
-    with open(xsl, mode) as f:
+    with open(xsl, mode,encoding="utf-8") as f:
         xsl_obj = f.read()
 
     output = geom_df.to_xml(stylesheet=xsl_obj)
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index 071bc67d2dad9..2b3afc16b1ecd 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -259,7 +259,7 @@ def parser(request):
 
 def read_xml_iterparse(data, **kwargs):
     with tm.ensure_clean() as path:
-        with open(path, "w") as f:
+        with open(path, "w",encoding="utf-8") as f:
             f.write(data)
         return read_xml(path, **kwargs)
 
@@ -267,7 +267,7 @@ def read_xml_iterparse(data, **kwargs):
 def read_xml_iterparse_comp(comp_path, compression_only, **kwargs):
     with get_handle(comp_path, "r", compression=compression_only) as handles:
         with tm.ensure_clean() as path:
-            with open(path, "w") as f:
+            with open(path, "w",encoding="utf-8") as f:
                 f.write(handles.handle.read())
             return read_xml(path, **kwargs)
 
@@ -351,7 +351,7 @@ def test_parser_consistency_url(parser):
 
 def test_file_like(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
-    with open(filename, mode) as f:
+    with open(filename, mode,encoding="utf-8") as f:
         df_file = read_xml(f, parser=parser)
 
     df_expected = DataFrame(
@@ -369,7 +369,7 @@ def test_file_like(datapath, parser, mode):
 
 def test_file_io(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
-    with open(filename, mode) as f:
+    with open(filename, mode,encoding="utf-8") as f:
         xml_obj = f.read()
 
     df_io = read_xml(
@@ -392,7 +392,7 @@ def test_file_io(datapath, parser, mode):
 
 def test_file_buffered_reader_string(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
-    with open(filename, mode) as f:
+    with open(filename, mode,encoding="utf-8") as f:
         xml_obj = f.read()
 
     df_str = read_xml(xml_obj, parser=parser)
@@ -412,7 +412,7 @@ def test_file_buffered_reader_string(datapath, parser, mode):
 
 def test_file_buffered_reader_no_xml_declaration(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
-    with open(filename, mode) as f:
+    with open(filename, mode,encoding="utf-8") as f:
         next(f)
         xml_obj = f.read()
 
@@ -1154,7 +1154,7 @@ def test_stylesheet_file_like(datapath, mode):
     kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
     xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
 
-    with open(xsl, mode) as f:
+    with open(xsl, mode,encoding="utf-8") as f:
         df_style = read_xml(
             kml,
             xpath=".//k:Placemark",
@@ -1174,7 +1174,7 @@ def test_stylesheet_io(datapath, mode):
     # consider using --check-untyped-defs
     xsl_obj: BytesIO | StringIO  # type: ignore[annotation-unchecked]
 
-    with open(xsl, mode) as f:
+    with open(xsl, mode,encoding="utf-8") as f:
         if mode == "rb":
             xsl_obj = BytesIO(f.read())
         else:
@@ -1195,7 +1195,7 @@ def test_stylesheet_buffered_reader(datapath, mode):
     kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
     xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
 
-    with open(xsl, mode) as f:
+    with open(xsl, mode,encoding="utf-8") as f:
         xsl_obj = f.read()
 
     df_style = read_xml(
@@ -1355,7 +1355,7 @@ def test_stylesheet_file_close(datapath, mode):
     # consider using --check-untyped-defs
     xsl_obj: BytesIO | StringIO  # type: ignore[annotation-unchecked]
 
-    with open(xsl, mode) as f:
+    with open(xsl, mode,encoding="utf-8") as f:
         if mode == "rb":
             xsl_obj = BytesIO(f.read())
         else:
@@ -1407,7 +1407,7 @@ def test_string_error(parser):
 def test_file_like_iterparse(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
 
-    with open(filename, mode) as f:
+    with open(filename, mode,encoding="utf-8") as f:
         if mode == "r" and parser == "lxml":
             with pytest.raises(
                 TypeError, match=("reading file objects must return bytes objects")
@@ -1444,7 +1444,7 @@ def test_file_io_iterparse(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
 
     funcIO = StringIO if mode == "r" else BytesIO
-    with open(filename, mode) as f:
+    with open(filename, mode,encoding="utf-8") as f:
         with funcIO(f.read()) as b:
             if mode == "r" and parser == "lxml":
                 with pytest.raises(
@@ -1550,7 +1550,7 @@ def test_bad_xml(parser):
   </row>
 """
     with tm.ensure_clean(filename="bad.xml") as path:
-        with open(path, "w") as f:
+        with open(path, "w",encoding="utf-8") as f:
             f.write(bad_xml)
 
         with pytest.raises(
diff --git a/pandas/tests/io/xml/test_xml_dtypes.py b/pandas/tests/io/xml/test_xml_dtypes.py
index 17d1e7e00653b..ba563d17f277f 100644
--- a/pandas/tests/io/xml/test_xml_dtypes.py
+++ b/pandas/tests/io/xml/test_xml_dtypes.py
@@ -29,7 +29,7 @@ def iterparse(request):
 
 def read_xml_iterparse(data, **kwargs):
     with tm.ensure_clean() as path:
-        with open(path, "w") as f:
+        with open(path, "w",encoding="utf-8") as f:
             f.write(data)
         return read_xml(path, **kwargs)
 
diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py
index 990c3698a5036..d5384de4079a7 100644
--- a/pandas/tests/series/methods/test_to_csv.py
+++ b/pandas/tests/series/methods/test_to_csv.py
@@ -52,7 +52,7 @@ def test_from_csv(self, datetime_series, string_series):
             series_h = self.read_csv(path, header=0)
             assert series_h.name == "series"
 
-            with open(path, "w") as outfile:
+            with open(path, "w",encoding="utf-8") as outfile:
                 outfile.write("1998-01-01|1.0\n1999-01-01|2.0")
 
             series = self.read_csv(path, sep="|", parse_dates=True)
@@ -69,7 +69,7 @@ def test_to_csv(self, datetime_series):
         with tm.ensure_clean() as path:
             datetime_series.to_csv(path, header=False)
 
-            with open(path, newline=None) as f:
+            with open(path, newline=None,encoding="utf-8") as f:
                 lines = f.readlines()
             assert lines[1] != "\n"
 
diff --git a/pandas/tests/util/test_show_versions.py b/pandas/tests/util/test_show_versions.py
index 714588d179aef..856aa0062e43d 100644
--- a/pandas/tests/util/test_show_versions.py
+++ b/pandas/tests/util/test_show_versions.py
@@ -16,7 +16,7 @@ def test_show_versions(tmpdir):
 
     pd.show_versions(as_json=as_json)
 
-    with open(as_json) as fd:
+    with open(as_json,encoding="utf-8") as fd:
         # check if file output is valid JSON, will raise an exception if not
         result = json.load(fd)
 
@@ -75,7 +75,7 @@ def test_json_output_match(capsys, tmpdir):
 
     out_path = os.path.join(tmpdir, "test_json.json")
     pd.show_versions(as_json=out_path)
-    with open(out_path) as out_fd:
+    with open(out_path,encoding="utf-8") as out_fd:
         result_file = out_fd.read()
 
     assert result_console == result_file
diff --git a/scripts/generate_pxi.py b/scripts/generate_pxi.py
index 3462b97aefcbf..586b2d4fe3e35 100644
--- a/scripts/generate_pxi.py
+++ b/scripts/generate_pxi.py
@@ -5,11 +5,11 @@
 
 
 def process_tempita(pxifile, outfile):
-    with open(pxifile) as f:
+    with open(pxifile,encoding="utf-8") as f:
         tmpl = f.read()
     pyxcontent = Tempita.sub(tmpl)
 
-    with open(outfile, "w") as f:
+    with open(outfile, "w",encoding="utf-8") as f:
         f.write(pyxcontent)
 
 
diff --git a/scripts/generate_version.py b/scripts/generate_version.py
index fbc78ab12429a..3b778567e2335 100644
--- a/scripts/generate_version.py
+++ b/scripts/generate_version.py
@@ -8,7 +8,7 @@ def write_version_info(path):
     if os.environ.get("MESON_DIST_ROOT"):
         # raise ValueError("dist root is", os.environ.get("MESON_DIST_ROOT"))
         path = os.path.join(os.environ.get("MESON_DIST_ROOT"), path)
-    with open(path, "w") as file:
+    with open(path, "w",encoding="utf-8") as file:
         file.write(f'__version__="{versioneer.get_version()}"\n')
         file.write(
             f'__git_version__="{versioneer.get_versions()["full-revisionid"]}"\n'
diff --git a/scripts/pandas_errors_documented.py b/scripts/pandas_errors_documented.py
index 52c1e2008b8a0..f23d9bc979334 100644
--- a/scripts/pandas_errors_documented.py
+++ b/scripts/pandas_errors_documented.py
@@ -34,7 +34,7 @@ def main(argv: Sequence[str] | None = None) -> None:
     args = parser.parse_args(argv)
     with open(args.path, encoding="utf-8") as f:
         file_errors = get_defined_errors(f.read())
-    with open(API_PATH) as f:
+    with open(API_PATH,encoding="utf-8") as f:
         doc_errors = {
             line.split(".")[1].strip() for line in f.readlines() if "errors" in line
         }
diff --git a/scripts/sort_whatsnew_note.py b/scripts/sort_whatsnew_note.py
index ae1d3346a5827..b7a427329a2df 100644
--- a/scripts/sort_whatsnew_note.py
+++ b/scripts/sort_whatsnew_note.py
@@ -63,12 +63,12 @@ def main(argv: Sequence[str] | None = None) -> int:
     args = parser.parse_args(argv)
     ret = 0
     for path in args.paths:
-        with open(path) as fd:
+        with open(path,encoding="utf-8") as fd:
             content = fd.read()
         new_content = sort_whatsnew_note(content)
         if content != new_content:
             ret |= 1
-            with open(path, "w") as fd:
+            with open(path, "w",encoding="utf-8") as fd:
                 fd.write(new_content)
     return ret
 
diff --git a/scripts/tests/test_validate_min_versions_in_sync.py b/scripts/tests/test_validate_min_versions_in_sync.py
index 13e8965bb7591..d454bf7063c3c 100644
--- a/scripts/tests/test_validate_min_versions_in_sync.py
+++ b/scripts/tests/test_validate_min_versions_in_sync.py
@@ -49,13 +49,13 @@
 def test_pin_min_versions_to_yaml_file(src_toml, src_yaml, expected_yaml):
     with open(src_toml, "rb") as toml_f:
         toml_map = tomllib.load(toml_f)
-    with open(src_yaml) as yaml_f:
+    with open(src_yaml,encoding="utf-8") as yaml_f:
         yaml_file_data = yaml_f.read()
     yaml_file = yaml.safe_load(yaml_file_data)
     yaml_dependencies = yaml_file["dependencies"]
     yaml_map = get_yaml_map_from(yaml_dependencies)
     toml_map = get_toml_map_from(toml_map)
     result_yaml_file = pin_min_versions_to_yaml_file(yaml_map, toml_map, yaml_file_data)
-    with open(expected_yaml) as yaml_f:
+    with open(expected_yaml,encoding="utf-8") as yaml_f:
         dummy_yaml_expected_file_1 = yaml_f.read()
     assert result_yaml_file == dummy_yaml_expected_file_1
diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py
index 4c133483f571f..6b43506bd03f1 100755
--- a/scripts/validate_docstrings.py
+++ b/scripts/validate_docstrings.py
@@ -367,7 +367,7 @@ def get_all_api_items():
     base_path = pathlib.Path(__file__).parent.parent
     api_doc_fnames = pathlib.Path(base_path, "doc", "source", "reference")
     for api_doc_fname in api_doc_fnames.glob("*.rst"):
-        with open(api_doc_fname) as f:
+        with open(api_doc_fname,encoding="utf-8") as f:
             yield from get_api_items(f)
 
 
diff --git a/scripts/validate_exception_location.py b/scripts/validate_exception_location.py
index 7af5e749b4b96..82154e82786b9 100644
--- a/scripts/validate_exception_location.py
+++ b/scripts/validate_exception_location.py
@@ -36,7 +36,7 @@
 
 
 def get_warnings_and_exceptions_from_api_path() -> set[str]:
-    with open(API_PATH) as f:
+    with open(API_PATH,encoding="utf-8") as f:
         doc_errors = {
             line.split(".")[1].strip() for line in f.readlines() if "errors" in line
         }
diff --git a/scripts/validate_min_versions_in_sync.py b/scripts/validate_min_versions_in_sync.py
index 9a6d97a222000..f4d51142876b9 100755
--- a/scripts/validate_min_versions_in_sync.py
+++ b/scripts/validate_min_versions_in_sync.py
@@ -63,7 +63,7 @@ def pin_min_versions_to_ci_deps() -> int:
         toml_dependencies = tomllib.load(toml_f)
     ret = 0
     for curr_file in all_yaml_files:
-        with open(curr_file) as yaml_f:
+        with open(curr_file,encoding="utf-8") as yaml_f:
             yaml_start_data = yaml_f.read()
         yaml_file = yaml.safe_load(yaml_start_data)
         yaml_dependencies = yaml_file["dependencies"]
@@ -73,7 +73,7 @@ def pin_min_versions_to_ci_deps() -> int:
             yaml_map, toml_map, yaml_start_data
         )
         if yaml_result_data != yaml_start_data:
-            with open(curr_file, "w") as f:
+            with open(curr_file, "w",encoding="utf-8") as f:
                 f.write(yaml_result_data)
             ret |= 1
     return ret
diff --git a/setup.py b/setup.py
index 49f6557e2e250..8fcd4804cf6e6 100755
--- a/setup.py
+++ b/setup.py
@@ -88,11 +88,11 @@ def render_templates(cls, pxifiles):
                 # if .pxi.in is not updated, no need to output .pxi
                 continue
 
-            with open(pxifile) as f:
+            with open(pxifile,encoding="utf-8") as f:
                 tmpl = f.read()
             pyxcontent = Tempita.sub(tmpl)
 
-            with open(outfile, "w") as f:
+            with open(outfile, "w",encoding="utf-8") as f:
                 f.write(pyxcontent)
 
     def build_extensions(self):
diff --git a/web/pandas_web.py b/web/pandas_web.py
index 5e902f1b1919b..75dde11c25138 100755
--- a/web/pandas_web.py
+++ b/web/pandas_web.py
@@ -110,7 +110,7 @@ def blog_add_posts(context):
                 md = markdown.Markdown(
                     extensions=context["main"]["markdown_extensions"]
                 )
-                with open(os.path.join(posts_path, fname)) as f:
+                with open(os.path.join(posts_path, fname),encoding="utf-8") as f:
                     html = md.convert(f.read())
                 title = md.Meta["title"][0]
                 summary = re.sub(tag_expr, "", html)
@@ -197,7 +197,7 @@ def maintainers_add_info(context):
 
         # save the data fetched from github to use it in case we exceed
         # git github api quota in the future
-        with open(pathlib.Path(context["target_path"]) / "maintainers.json", "w") as f:
+        with open(pathlib.Path(context["target_path"]) / "maintainers.json", "w",encoding="utf-8") as f:
             json.dump(maintainers_info, f)
 
         return context
@@ -220,7 +220,7 @@ def home_add_releases(context):
             resp.raise_for_status()
             releases = resp.json()
 
-        with open(pathlib.Path(context["target_path"]) / "releases.json", "w") as f:
+        with open(pathlib.Path(context["target_path"]) / "releases.json", "w",encoding="utf-8") as f:
             json.dump(releases, f, default=datetime.datetime.isoformat)
 
         for release in releases:
@@ -304,7 +304,7 @@ def roadmap_pdeps(context):
             resp.raise_for_status()
             pdeps = resp.json()
 
-        with open(pathlib.Path(context["target_path"]) / "pdeps.json", "w") as f:
+        with open(pathlib.Path(context["target_path"]) / "pdeps.json", "w",encoding="utf-8") as f:
             json.dump(pdeps, f)
 
         for pdep in sorted(pdeps["items"], key=operator.itemgetter("title")):
@@ -346,7 +346,7 @@ def get_context(config_fname: str, **kwargs):
     Load the config yaml as the base context, and enrich it with the
     information added by the context preprocessors defined in the file.
     """
-    with open(config_fname) as f:
+    with open(config_fname,encoding="utf-8") as f:
         context = yaml.safe_load(f)
 
     context["source_path"] = os.path.dirname(config_fname)
@@ -418,7 +418,7 @@ def main(
 
         extension = os.path.splitext(fname)[-1]
         if extension in (".html", ".md"):
-            with open(os.path.join(source_path, fname)) as f:
+            with open(os.path.join(source_path, fname),encoding="utf-8") as f:
                 content = f.read()
             if extension == ".md":
                 body = markdown.markdown(
@@ -431,7 +431,7 @@ def main(
             context["base_url"] = "".join(["../"] * os.path.normpath(fname).count("/"))
             content = jinja_env.from_string(content).render(**context)
             fname_html = os.path.splitext(fname)[0] + ".html"
-            with open(os.path.join(target_path, fname_html), "w") as f:
+            with open(os.path.join(target_path, fname_html), "w",encoding="utf-8") as f:
                 f.write(content)
         else:
             shutil.copy(

From e5282921d570dfa36d0431615d69e158aeeb1d72 Mon Sep 17 00:00:00 2001
From: Ketu Patel <ketup806@gmail.com>
Date: Sat, 29 Apr 2023 17:37:15 +0530
Subject: [PATCH 05/17] b.pre-commit removed

---
 b/.pre-commit-config.yaml | 432 --------------------------------------
 1 file changed, 432 deletions(-)
 delete mode 100644 b/.pre-commit-config.yaml

diff --git a/b/.pre-commit-config.yaml b/b/.pre-commit-config.yaml
deleted file mode 100644
index 34cd91940f014..0000000000000
--- a/b/.pre-commit-config.yaml
+++ /dev/null
@@ -1,432 +0,0 @@
-minimum_pre_commit_version: 2.15.0
-exclude: ^LICENSES/|\.(html|csv|svg)$
-# reserve "manual" for relatively slow hooks which we still want to run in CI
-default_stages: [
-    commit,
-    merge-commit,
-    push,
-    prepare-commit-msg,
-    commit-msg,
-    post-checkout,
-    post-commit,
-    post-merge,
-    post-rewrite
-]
-ci:
-    autofix_prs: false
-repos:
--   repo: local
-    hooks:
-    # NOTE: we make `black` a local hook because if it's installed from
-    # PyPI (rather than from source) then it'll run twice as fast thanks to mypyc
-    -   id: black
-        name: black
-        description: "Black: The uncompromising Python code formatter"
-        entry: black
-        language: python
-        require_serial: true
-        types_or: [python, pyi]
-        additional_dependencies: [black==23.1.0]
--   repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.0.259
-    hooks:
-    -   id: ruff
-        args: [--exit-non-zero-on-fix]
--   repo: https://github.com/jendrikseipp/vulture
-    rev: 'v2.7'
-    hooks:
-      - id: vulture
-        entry: python scripts/run_vulture.py
-        pass_filenames: true
-        require_serial: false
--   repo: https://github.com/codespell-project/codespell
-    rev: v2.2.2
-    hooks:
-    -   id: codespell
-        types_or: [python, rst, markdown, cython, c]
-        additional_dependencies: [tomli]
--   repo: https://github.com/MarcoGorelli/cython-lint
-    rev: v0.12.5
-    hooks:
-    -   id: cython-lint
-    -   id: double-quote-cython-strings
--   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
-    hooks:
-    -   id: debug-statements
-    -   id: end-of-file-fixer
-        exclude: \.txt$
-        stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg,
-                 post-checkout, post-commit, post-merge, post-rewrite]
-    -   id: trailing-whitespace
-        stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg,
-                 post-checkout, post-commit, post-merge, post-rewrite]
--   repo: https://github.com/cpplint/cpplint
-    rev: 1.6.1
-    hooks:
-    -   id: cpplint
-        # We don't lint all C files because we don't want to lint any that are built
-        # from Cython files nor do we want to lint C files that we didn't modify for
-        # this particular codebase (e.g. src/headers, src/klib). However,
-        # we can lint all header files since they aren't "generated" like C files are.
-        exclude: ^pandas/_libs/src/(klib|headers)/
-        args: [
-            --quiet,
-            '--extensions=c,h',
-            '--headers=h',
-            --recursive,
-            --linelength=88,
-            '--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size'
-        ]
--   repo: https://github.com/pycqa/pylint
-    rev: v2.16.2
-    hooks:
-    -   id: pylint
-        stages: [manual]
-    -   id: pylint
-        alias: redefined-outer-name
-        name: Redefining name from outer scope
-        files: ^pandas/
-        exclude: |
-            (?x)
-            ^pandas/tests  # keep excluded
-            |/_testing/  # keep excluded
-            |^pandas/util/_test_decorators\.py  # keep excluded
-            |^pandas/_version\.py  # keep excluded
-            |^pandas/conftest\.py  # keep excluded
-        args: [--disable=all, --enable=redefined-outer-name]
-        stages: [manual]
-    -   id: pylint
-        alias: unspecified-encoding
-        name: Using open without explicitly specifying an encoding
-        args: [--disable=all, --enable=unspecified-encoding]
-        stages: [manual]
--   repo: https://github.com/PyCQA/isort
-    rev: 5.12.0
-    hooks:
-    -   id: isort
--   repo: https://github.com/asottile/pyupgrade
-    rev: v3.3.1
-    hooks:
-    -   id: pyupgrade
-        args: [--py38-plus]
--   repo: https://github.com/pre-commit/pygrep-hooks
-    rev: v1.10.0
-    hooks:
-      - id: rst-backticks
-      - id: rst-directive-colons
-        types: [text]  # overwrite types: [rst]
-        types_or: [python, rst]
-      - id: rst-inline-touching-normal
-        types: [text]  # overwrite types: [rst]
-        types_or: [python, rst]
--   repo: https://github.com/sphinx-contrib/sphinx-lint
-    rev: v0.6.7
-    hooks:
-    - id: sphinx-lint
--   repo: local
-    hooks:
-    -   id: pyright
-        # note: assumes python env is setup and activated
-        name: pyright
-        entry: pyright
-        language: node
-        pass_filenames: false
-        types: [python]
-        stages: [manual]
-        additional_dependencies: &pyright_dependencies
-        - pyright@1.1.292
-    -   id: pyright_reportGeneralTypeIssues
-        # note: assumes python env is setup and activated
-        name: pyright reportGeneralTypeIssues
-        entry: pyright --skipunannotated -p pyright_reportGeneralTypeIssues.json --level warning
-        language: node
-        pass_filenames: false
-        types: [python]
-        stages: [manual]
-        additional_dependencies: *pyright_dependencies
-    -   id: mypy
-        # note: assumes python env is setup and activated
-        name: mypy
-        entry: mypy
-        language: system
-        pass_filenames: false
-        types: [python]
-        stages: [manual]
-    -   id: stubtest
-        # note: assumes python env is setup and activated
-        # note: requires pandas dev to be installed
-        name: mypy (stubtest)
-        entry: python
-        language: system
-        pass_filenames: false
-        types: [pyi]
-        args: [scripts/run_stubtest.py]
-        stages: [manual]
-    -   id: inconsistent-namespace-usage
-        name: 'Check for inconsistent use of pandas namespace'
-        entry: python scripts/check_for_inconsistent_pandas_namespace.py
-        exclude: ^pandas/core/interchange/
-        language: python
-        types: [python]
-    -   id: no-os-remove
-        name: Check code for instances of os.remove
-        entry: os\.remove
-        language: pygrep
-        types: [python]
-        files: ^pandas/tests/
-        exclude: |
-            (?x)^
-            pandas/tests/io/pytables/test_store\.py$
-    -   id: unwanted-patterns
-        name: Unwanted patterns
-        language: pygrep
-        entry: |
-            (?x)
-            # outdated annotation syntax, missing error codes
-            \#\ type:\ (?!ignore)
-            |\#\ type:\s?ignore(?!\[)
-
-            # foo._class__ instead of type(foo)
-            |\.__class__
-
-            # np.bool/np.object instead of np.bool_/np.object_
-            |np\.bool[^_8`]
-            |np\.object[^_8`]
-
-            # imports from collections.abc instead of `from collections import abc`
-            |from\ collections\.abc\ import
-
-            # Numpy
-            |from\ numpy\ import\ random
-            |from\ numpy\.random\ import
-
-            # Incorrect code-block / IPython directives
-            |\.\.\ code-block\ ::
-            |\.\.\ ipython\ ::
-            # directive should not have a space before ::
-            |\.\.\ \w+\ ::
-
-            # Check for deprecated messages without sphinx directive
-            |(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)
-
-            # {foo!r} instead of {repr(foo)}
-            |!r}
-
-            # builtin filter function
-            |(?<!def)[\(\s]filter\(
-
-            # exec
-            |[^a-zA-Z0-9_]exec\(
-        types_or: [python, cython, rst]
-        exclude: ^doc/source/development/code_style\.rst  # contains examples of patterns to avoid
-    -   id: cython-casting
-        name: Check Cython casting is `<type>obj`, not `<type> obj`
-        language: pygrep
-        entry: '[a-zA-Z0-9*]> '
-        files: (\.pyx|\.pxi.in)$
-    -   id: incorrect-backticks
-        name: Check for backticks incorrectly rendering because of missing spaces
-        language: pygrep
-        entry: '[a-zA-Z0-9]\`\`?[a-zA-Z0-9]'
-        types: [rst]
-        files: ^doc/source/
-    -   id: seed-check-asv
-        name: Check for unnecessary random seeds in asv benchmarks
-        language: pygrep
-        entry: 'np\.random\.seed'
-        files: ^asv_bench/benchmarks
-        exclude: ^asv_bench/benchmarks/pandas_vb_common\.py
-    -   id: np-testing-array-equal
-        name: Check for usage of numpy testing or array_equal
-        language: pygrep
-        entry: '(numpy|np)(\.testing|\.array_equal)'
-        files: ^pandas/tests/
-        types: [python]
-    -   id: invalid-ea-testing
-        name: Check for invalid EA testing
-        language: pygrep
-        entry: 'tm\.assert_(series|frame)_equal'
-        files: ^pandas/tests/extension/base
-        types: [python]
-        exclude: ^pandas/tests/extension/base/base\.py
-    -   id: unwanted-patterns-in-tests
-        name: Unwanted patterns in tests
-        language: pygrep
-        entry: |
-            (?x)
-            # pytest.xfail instead of pytest.mark.xfail
-            pytest\.xfail
-
-            # imports from pandas._testing instead of `import pandas._testing as tm`
-            |from\ pandas\._testing\ import
-            |from\ pandas\ import\ _testing\ as\ tm
-
-            # No direct imports from conftest
-            |conftest\ import
-            |import\ conftest
-
-            # pandas.testing instead of tm
-            |pd\.testing\.
-
-            # pd.api.types instead of from pandas.api.types import ...
-            |(pd|pandas)\.api\.types\.
-
-            # np.testing, np.array_equal
-            |(numpy|np)(\.testing|\.array_equal)
-
-            # unittest.mock (use pytest builtin monkeypatch fixture instead)
-            |(unittest(\.| import )mock|mock\.Mock\(\)|mock\.patch)
-
-            # pytest raises without context
-            |\s\ pytest.raises
-
-            # pytest.warns (use tm.assert_produces_warning instead)
-            |pytest\.warns
-        files: ^pandas/tests/
-        types_or: [python, cython, rst]
-    -   id: unwanted-patterns-in-ea-tests
-        name: Unwanted patterns in EA tests
-        language: pygrep
-        entry: |
-            (?x)
-            tm.assert_(series|frame)_equal
-        files: ^pandas/tests/extension/base/
-        exclude: ^pandas/tests/extension/base/base\.py$
-        types_or: [python, cython, rst]
-    -   id: unwanted-patterns-in-cython
-        name: Unwanted patterns in Cython code
-        language: pygrep
-        entry: |
-            (?x)
-            # `<type>obj` as opposed to `<type> obj`
-            [a-zA-Z0-9*]>[ ]
-        types: [cython]
-    -   id: pip-to-conda
-        name: Generate pip dependency from conda
-        language: python
-        entry: python scripts/generate_pip_deps_from_conda.py
-        files: ^(environment.yml|requirements-dev.txt)$
-        pass_filenames: false
-        additional_dependencies: [tomli, pyyaml]
-    -   id: title-capitalization
-        name: Validate correct capitalization among titles in documentation
-        entry: python scripts/validate_rst_title_capitalization.py
-        language: python
-        types: [rst]
-        files: ^doc/source/(development|reference)/
-    -   id: unwanted-patterns-bare-pytest-raises
-        name: Check for use of bare pytest raises
-        language: python
-        entry: python scripts/validate_unwanted_patterns.py --validation-type="bare_pytest_raises"
-        types: [python]
-        files: ^pandas/tests/
-        exclude: ^pandas/tests/extension/
-    -   id: unwanted-patterns-private-function-across-module
-        name: Check for use of private functions across modules
-        language: python
-        entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module"
-        types: [python]
-        exclude: ^(asv_bench|pandas/tests|doc)/
-    -   id: unwanted-patterns-private-import-across-module
-        name: Check for import of private attributes across modules
-        language: python
-        entry: python scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module"
-        types: [python]
-        exclude: |
-            (?x)
-            ^(asv_bench|pandas/tests|doc)/
-            |scripts/validate_min_versions_in_sync\.py$
-    -   id: unwanted-patterns-strings-with-misplaced-whitespace
-        name: Check for strings with misplaced spaces
-        language: python
-        entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace"
-        types_or: [python, cython]
-    -   id: use-pd_array-in-core
-        name: Import pandas.array as pd_array in core
-        language: python
-        entry: python scripts/use_pd_array_in_core.py
-        files: ^pandas/core/
-        exclude: ^pandas/core/api\.py$
-        types: [python]
-    -   id: use-io-common-urlopen
-        name: Use pandas.io.common.urlopen instead of urllib.request.urlopen
-        language: python
-        entry: python scripts/use_io_common_urlopen.py
-        files: ^pandas/
-        exclude: ^pandas/tests/
-        types: [python]
-    -   id: no-bool-in-core-generic
-        name: Use bool_t instead of bool in pandas/core/generic.py
-        entry: python scripts/no_bool_in_generic.py
-        language: python
-        files: ^pandas/core/generic\.py$
-    -   id: no-return-exception
-        name: Use raise instead of return for exceptions
-        language: pygrep
-        entry: 'return [A-Za-z]+(Error|Exit|Interrupt|Exception|Iteration)'
-        files: ^pandas/
-        types: [python]
-        exclude: ^pandas/tests/
-    -   id: pandas-errors-documented
-        name: Ensure pandas errors are documented in doc/source/reference/testing.rst
-        entry: python scripts/pandas_errors_documented.py
-        language: python
-        files: ^pandas/errors/__init__.py$
-    -   id: pg8000-not-installed-CI
-        name: Check for pg8000 not installed on CI for test_pg8000_sqlalchemy_passthrough_error
-        language: pygrep
-        entry: 'pg8000'
-        files: ^ci/deps
-        types: [yaml]
-    -   id: validate-min-versions-in-sync
-        name: Check minimum version of dependencies are aligned
-        entry: python -m scripts.validate_min_versions_in_sync
-        language: python
-        files: ^(ci/deps/actions-.*-minimum_versions\.yaml|pandas/compat/_optional\.py)$
-        additional_dependencies: [tomli, pyyaml]
-        pass_filenames: false
-    -   id: validate-errors-locations
-        name: Validate errors locations
-        description: Validate errors are in appropriate locations.
-        entry: python scripts/validate_exception_location.py
-        language: python
-        files: ^pandas/
-        exclude: ^(pandas/_libs/|pandas/tests/|pandas/errors/__init__.py$|pandas/_version.py)
-        types: [python]
-    -   id: future-annotations
-        name: import annotations from __future__
-        entry: 'from __future__ import annotations'
-        language: pygrep
-        args: [--negate]
-        files: ^pandas/
-        types: [python]
-        exclude: |
-            (?x)
-            /(__init__\.py)|(api\.py)|(_version\.py)|(testing\.py)|(conftest\.py)$
-            |/tests/
-            |/_testing/
-    -   id: autotyping
-        name: autotyping
-        entry: python -m scripts.run_autotyping
-        types_or: [python, pyi]
-        files: ^pandas
-        exclude: ^(pandas/tests|pandas/_version.py|pandas/io/clipboard)
-        language: python
-        stages: [manual]
-        additional_dependencies:
-        - autotyping==23.3.0
-        - libcst==0.4.9
-    -   id: check-test-naming
-        name: check that test names start with 'test'
-        entry: python -m scripts.check_test_naming
-        types: [python]
-        files: ^pandas/tests
-        language: python
-    -   id: sort-whatsnew-items
-        name: sort whatsnew entries alphabetically
-        entry: python -m scripts.sort_whatsnew_note
-        types: [rst]
-        language: python
-        files: ^doc/source/whatsnew/v
-        exclude: ^doc/source/whatsnew/v(0|1|2\.0\.0)

From 3d72dc3c32b9b97e9e6d80efef8dd207b0e9df16 Mon Sep 17 00:00:00 2001
From: Ketu Patel <ketup806@gmail.com>
Date: Sat, 29 Apr 2023 17:47:06 +0530
Subject: [PATCH 06/17] Final Commit

---
 asv_bench/benchmarks/io/csv.py                |  2 +-
 doc/make.py                                   |  8 +++---
 doc/source/conf.py                            |  4 +--
 pandas/_testing/contexts.py                   |  2 +-
 pandas/_version.py                            |  2 +-
 pandas/core/series.py                         |  2 +-
 pandas/io/clipboard/__init__.py               |  4 +--
 pandas/tests/frame/methods/test_to_csv.py     |  2 +-
 pandas/tests/io/excel/test_readers.py         |  2 +-
 pandas/tests/io/formats/style/test_html.py    |  2 +-
 pandas/tests/io/formats/test_to_csv.py        | 24 ++++++++---------
 pandas/tests/io/formats/test_to_latex.py      |  2 +-
 pandas/tests/io/json/test_pandas.py           |  2 +-
 .../tests/io/parser/common/test_chunksize.py  |  2 +-
 .../io/parser/common/test_file_buffer_url.py  |  6 ++---
 .../tests/io/parser/common/test_iterator.py   |  4 +--
 pandas/tests/io/parser/test_c_parser_only.py  |  6 ++---
 pandas/tests/io/parser/test_compression.py    |  6 +++--
 pandas/tests/io/test_common.py                |  4 +--
 pandas/tests/io/test_compression.py           |  4 +--
 pandas/tests/io/test_gcs.py                   |  2 +-
 pandas/tests/io/test_html.py                  |  2 +-
 pandas/tests/io/xml/test_to_xml.py            |  6 ++---
 pandas/tests/io/xml/test_xml.py               | 26 +++++++++----------
 pandas/tests/io/xml/test_xml_dtypes.py        |  2 +-
 pandas/tests/series/methods/test_to_csv.py    |  4 +--
 pandas/tests/util/test_show_versions.py       |  4 +--
 scripts/generate_pxi.py                       |  4 +--
 scripts/generate_version.py                   |  2 +-
 scripts/pandas_errors_documented.py           |  2 +-
 scripts/sort_whatsnew_note.py                 |  4 +--
 .../test_validate_min_versions_in_sync.py     |  4 +--
 scripts/validate_docstrings.py                |  2 +-
 scripts/validate_exception_location.py        |  2 +-
 scripts/validate_min_versions_in_sync.py      |  4 +--
 setup.py                                      |  4 +--
 web/pandas_web.py                             | 26 ++++++++++++++-----
 37 files changed, 102 insertions(+), 88 deletions(-)

diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py
index 856327dfe876f..07d536d827959 100644
--- a/asv_bench/benchmarks/io/csv.py
+++ b/asv_bench/benchmarks/io/csv.py
@@ -444,7 +444,7 @@ class ReadCSVMemoryGrowth(BaseIO):
     param_names = ["engine"]
 
     def setup(self, engine):
-        with open(self.fname, "w",encoding="utf-8") as f:
+        with open(self.fname, "w", encoding="utf-8") as f:
             for i in range(self.num_rows):
                 f.write(f"{i}\n")
 
diff --git a/doc/make.py b/doc/make.py
index 78edbc997eb55..ed13ed87bcdbb 100755
--- a/doc/make.py
+++ b/doc/make.py
@@ -163,12 +163,12 @@ def _get_page_title(self, page):
             components=(docutils.parsers.rst.Parser,)
         )
         doc = docutils.utils.new_document("<doc>", option_parser.get_default_values())
-        with open(fname,encoding="utf-8") as f:
+        with open(fname, encoding="utf-8") as f:
             data = f.read()
 
         parser = docutils.parsers.rst.Parser()
         # do not generate any warning when parsing the rst
-        with open(os.devnull, "a",encoding="utf-8") as f:
+        with open(os.devnull, "a", encoding="utf-8") as f:
             doc.reporter.stream = f
             parser.parse(data, doc)
 
@@ -186,7 +186,7 @@ def _add_redirects(self):
         Create in the build directory an html file with a redirect,
         for every row in REDIRECTS_FILE.
         """
-        with open(REDIRECTS_FILE,encoding="utf-8") as mapping_fd:
+        with open(REDIRECTS_FILE, encoding="utf-8") as mapping_fd:
             reader = csv.reader(mapping_fd)
             for row in reader:
                 if not row or row[0].strip().startswith("#"):
@@ -209,7 +209,7 @@ def _add_redirects(self):
                     # sphinx specific stuff
                     title = "this page"
 
-                with open(path, "w",encoding="utf-8") as moved_page_fd:
+                with open(path, "w", encoding="utf-8") as moved_page_fd:
                     html = f"""\
 <html>
     <head>
diff --git a/doc/source/conf.py b/doc/source/conf.py
index e5e764f4c7ef4..6c0b1c21b8778 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -117,9 +117,9 @@
                 elif single_doc and rel_fname != pattern:
                     exclude_patterns.append(rel_fname)
 
-with open(os.path.join(source_path, "index.rst.template"),encoding="utf-8") as f:
+with open(os.path.join(source_path, "index.rst.template"), encoding="utf-8") as f:
     t = jinja2.Template(f.read())
-with open(os.path.join(source_path, "index.rst"), "w",encoding="utf-8") as f:
+with open(os.path.join(source_path, "index.rst"), "w", encoding="utf-8") as f:
     f.write(
         t.render(
             include_api=include_api,
diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py
index d36dedc3008ac..f11041d477701 100644
--- a/pandas/_testing/contexts.py
+++ b/pandas/_testing/contexts.py
@@ -126,7 +126,7 @@ def ensure_clean(
     handle_or_str: str | IO = str(path)
     if return_filelike:
         kwargs.setdefault("mode", "w+b")
-        handle_or_str = open(path, **kwargs,encoding="utf-8")
+        handle_or_str = open(path, **kwargs, encoding="utf-8")
 
     try:
         yield handle_or_str
diff --git a/pandas/_version.py b/pandas/_version.py
index 59215ff72e715..8c655648377c7 100644
--- a/pandas/_version.py
+++ b/pandas/_version.py
@@ -159,7 +159,7 @@ def git_get_keywords(versionfile_abs):
     # _version.py.
     keywords = {}
     try:
-        with open(versionfile_abs,encoding="utf-8") as fobj:
+        with open(versionfile_abs, encoding="utf-8") as fobj:
             for line in fobj:
                 if line.strip().startswith("git_refnames ="):
                     mo = re.search(r'=\s*"(.*)"', line)
diff --git a/pandas/core/series.py b/pandas/core/series.py
index d7db059b7cba5..7053b88e0da2b 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1699,7 +1699,7 @@ def to_string(
             if hasattr(buf, "write"):
                 buf.write(result)
             else:
-                with open(buf, "w",encoding="utf-8") as f:
+                with open(buf, "w", encoding="utf-8") as f:
                     f.write(result)
         return None
 
diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py
index d29c6c216256c..c07f51d875d4d 100644
--- a/pandas/io/clipboard/__init__.py
+++ b/pandas/io/clipboard/__init__.py
@@ -282,11 +282,11 @@ def copy_dev_clipboard(text):
                 stacklevel=find_stack_level(),
             )
 
-        with open("/dev/clipboard", "w",encoding="utf-8") as fd:
+        with open("/dev/clipboard", "w", encoding="utf-8") as fd:
             fd.write(text)
 
     def paste_dev_clipboard() -> str:
-        with open("/dev/clipboard",encoding="utf-8") as fd:
+        with open("/dev/clipboard", encoding="utf-8") as fd:
             content = fd.read()
         return content
 
diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
index 494bff217b60e..b44b05f9f8153 100644
--- a/pandas/tests/frame/methods/test_to_csv.py
+++ b/pandas/tests/frame/methods/test_to_csv.py
@@ -626,7 +626,7 @@ def test_to_csv_float32_nanrep(self):
         with tm.ensure_clean("__tmp_to_csv_float32_nanrep__.csv") as path:
             df.to_csv(path, na_rep=999)
 
-            with open(path,encoding="utf-8") as f:
+            with open(path, encoding="utf-8") as f:
                 lines = f.readlines()
                 assert lines[1].split(",")[2] == "999"
 
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index cf0df8e5c23e5..b8fdbce0f5c23 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -1702,7 +1702,7 @@ def test_corrupt_files_closed(self, engine, read_ext):
             errors = (BadZipFile, xlrd.biffh.XLRDError)
 
         with tm.ensure_clean(f"corrupt{read_ext}") as file:
-            Path(file).write_text("corrupt",encoding="utf-8")
+            Path(file).write_text("corrupt", encoding="utf-8")
             with tm.assert_produces_warning(False):
                 try:
                     pd.ExcelFile(file, engine=engine)
diff --git a/pandas/tests/io/formats/style/test_html.py b/pandas/tests/io/formats/style/test_html.py
index d06e7ef254469..7b0617fbd829f 100644
--- a/pandas/tests/io/formats/style/test_html.py
+++ b/pandas/tests/io/formats/style/test_html.py
@@ -43,7 +43,7 @@ def tpl_table():
 def test_html_template_extends_options():
     # make sure if templates are edited tests are updated as are setup fixtures
     # to understand the dependency
-    with open("pandas/io/formats/templates/html.tpl",encoding="utf-8") as file:
+    with open("pandas/io/formats/templates/html.tpl", encoding="utf-8") as file:
         result = file.read()
     assert "{% include html_style_tpl %}" in result
     assert "{% include html_table_tpl %}" in result
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index 2c2ed7f8514c7..a208daaf9f77b 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -32,7 +32,7 @@ def test_to_csv_with_single_column(self):
 """
         with tm.ensure_clean("test.csv") as path:
             df1.to_csv(path, header=None, index=None)
-            with open(path,encoding="utf-8") as f:
+            with open(path, encoding="utf-8") as f:
                 assert f.read() == expected1
 
         df2 = DataFrame([1, None])
@@ -42,7 +42,7 @@ def test_to_csv_with_single_column(self):
 """
         with tm.ensure_clean("test.csv") as path:
             df2.to_csv(path, header=None, index=None)
-            with open(path,encoding="utf-8") as f:
+            with open(path, encoding="utf-8") as f:
                 assert f.read() == expected2
 
     def test_to_csv_default_encoding(self):
@@ -64,7 +64,7 @@ def test_to_csv_quotechar(self):
 
         with tm.ensure_clean("test.csv") as path:
             df.to_csv(path, quoting=1)  # 1=QUOTE_ALL
-            with open(path,encoding="utf-8") as f:
+            with open(path, encoding="utf-8") as f:
                 assert f.read() == expected
 
         expected = """\
@@ -75,7 +75,7 @@ def test_to_csv_quotechar(self):
 
         with tm.ensure_clean("test.csv") as path:
             df.to_csv(path, quoting=1, quotechar="$")
-            with open(path,encoding="utf-8") as f:
+            with open(path, encoding="utf-8") as f:
                 assert f.read() == expected
 
         with tm.ensure_clean("test.csv") as path:
@@ -92,7 +92,7 @@ def test_to_csv_doublequote(self):
 
         with tm.ensure_clean("test.csv") as path:
             df.to_csv(path, quoting=1, doublequote=True)  # QUOTE_ALL
-            with open(path,encoding="utf-8") as f:
+            with open(path, encoding="utf-8") as f:
                 assert f.read() == expected
 
         with tm.ensure_clean("test.csv") as path:
@@ -109,7 +109,7 @@ def test_to_csv_escapechar(self):
 
         with tm.ensure_clean("test.csv") as path:  # QUOTE_ALL
             df.to_csv(path, quoting=1, doublequote=False, escapechar="\\")
-            with open(path,encoding="utf-8") as f:
+            with open(path, encoding="utf-8") as f:
                 assert f.read() == expected
 
         df = DataFrame({"col": ["a,a", ",bb,"]})
@@ -121,7 +121,7 @@ def test_to_csv_escapechar(self):
 
         with tm.ensure_clean("test.csv") as path:
             df.to_csv(path, quoting=3, escapechar="\\")  # QUOTE_NONE
-            with open(path,encoding="utf-8") as f:
+            with open(path, encoding="utf-8") as f:
                 assert f.read() == expected
 
     def test_csv_to_string(self):
@@ -401,7 +401,7 @@ def test_to_csv_string_array_ascii(self):
 """
         with tm.ensure_clean("str_test.csv") as path:
             df.to_csv(path, encoding="ascii")
-            with open(path,encoding="utf-8") as f:
+            with open(path, encoding="utf-8") as f:
                 assert f.read() == expected_ascii
 
     def test_to_csv_string_array_utf8(self):
@@ -415,7 +415,7 @@ def test_to_csv_string_array_utf8(self):
 """
         with tm.ensure_clean("unicode_test.csv") as path:
             df.to_csv(path, encoding="utf-8")
-            with open(path,encoding="utf-8") as f:
+            with open(path, encoding="utf-8") as f:
                 assert f.read() == expected_utf8
 
     def test_to_csv_string_with_lf(self):
@@ -521,10 +521,10 @@ def test_to_csv_write_to_open_file(self):
 z
 """
         with tm.ensure_clean("test.txt") as path:
-            with open(path, "w",encoding="utf-8") as f:
+            with open(path, "w", encoding="utf-8") as f:
                 f.write("manual header\n")
                 df.to_csv(f, header=None, index=None)
-            with open(path,encoding="utf-8") as f:
+            with open(path, encoding="utf-8") as f:
                 assert f.read() == expected
 
     def test_to_csv_write_to_open_file_with_newline_py3(self):
@@ -534,7 +534,7 @@ def test_to_csv_write_to_open_file_with_newline_py3(self):
         expected_rows = ["x", "y", "z"]
         expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows)
         with tm.ensure_clean("test.txt") as path:
-            with open(path, "w", newline="",encoding="utf-8") as f:
+            with open(path, "w", newline="", encoding="utf-8") as f:
                 f.write("manual header\n")
                 df.to_csv(f, header=None, index=None)
 
diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py
index c46aa609922a3..f127dc1dfc74b 100644
--- a/pandas/tests/io/formats/test_to_latex.py
+++ b/pandas/tests/io/formats/test_to_latex.py
@@ -34,7 +34,7 @@ class TestToLatex:
     def test_to_latex_to_file(self, float_frame):
         with tm.ensure_clean("test.tex") as path:
             float_frame.to_latex(path)
-            with open(path,encoding="utf-8") as f:
+            with open(path, encoding="utf-8") as f:
                 assert float_frame.to_latex() == f.read()
 
     def test_to_latex_to_file_utf8_with_encoding(self):
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 377e1cc3d99ba..788a6e97e3d0f 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -1214,7 +1214,7 @@ def test_read_s3_jsonl(self, s3_resource, s3so):
     def test_read_local_jsonl(self):
         # GH17200
         with tm.ensure_clean("tmp_items.json") as path:
-            with open(path, "w",encoding="utf-8") as infile:
+            with open(path, "w", encoding="utf-8") as infile:
                 infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n')
             result = read_json(path, lines=True)
             expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
diff --git a/pandas/tests/io/parser/common/test_chunksize.py b/pandas/tests/io/parser/common/test_chunksize.py
index 14327a98ec47e..6be7269cb8433 100644
--- a/pandas/tests/io/parser/common/test_chunksize.py
+++ b/pandas/tests/io/parser/common/test_chunksize.py
@@ -228,7 +228,7 @@ def test_read_csv_memory_growth_chunksize(all_parsers):
     parser = all_parsers
 
     with tm.ensure_clean() as path:
-        with open(path, "w",encoding="utf-8") as f:
+        with open(path, "w", encoding="utf-8") as f:
             for i in range(1000):
                 f.write(str(i) + "\n")
 
diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py
index ba41b46f37099..1f3ee18541f4d 100644
--- a/pandas/tests/io/parser/common/test_file_buffer_url.py
+++ b/pandas/tests/io/parser/common/test_file_buffer_url.py
@@ -107,7 +107,7 @@ def test_no_permission(all_parsers):
 
         # verify that this process cannot open the file (not running as sudo)
         try:
-            with open(path,encoding="utf-8"):
+            with open(path, encoding="utf-8"):
                 pass
             pytest.skip("Running as sudo.")
         except PermissionError:
@@ -285,7 +285,7 @@ def test_file_handles_with_open(all_parsers, csv1):
     parser = all_parsers
 
     for mode in ["r", "rb"]:
-        with open(csv1, mode,encoding="utf-8") as f:
+        with open(csv1, mode, encoding="utf-8") as f:
             parser.read_csv(f)
             assert not f.closed
 
@@ -392,7 +392,7 @@ def test_context_manageri_user_provided(all_parsers, datapath):
     # make sure that user-provided handles are not closed
     parser = all_parsers
 
-    with open(datapath("io", "data", "csv", "iris.csv"),encoding="utf-8") as path:
+    with open(datapath("io", "data", "csv", "iris.csv"), encoding="utf-8") as path:
         reader = parser.read_csv(path, chunksize=1)
         assert not reader.handles.handle.closed
         try:
diff --git a/pandas/tests/io/parser/common/test_iterator.py b/pandas/tests/io/parser/common/test_iterator.py
index f517c94998138..58e5886aedd6b 100644
--- a/pandas/tests/io/parser/common/test_iterator.py
+++ b/pandas/tests/io/parser/common/test_iterator.py
@@ -95,10 +95,10 @@ def test_iteration_open_handle(all_parsers):
     kwargs = {"header": None}
 
     with tm.ensure_clean() as path:
-        with open(path, "w",encoding="utf-8") as f:
+        with open(path, "w", encoding="utf-8") as f:
             f.write("AAA\nBBB\nCCC\nDDD\nEEE\nFFF\nGGG")
 
-        with open(path,encoding="utf-8") as f:
+        with open(path, encoding="utf-8") as f:
             for line in f:
                 if "CCC" in line:
                     break
diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py
index d45f47a805019..425f5cfbcf392 100644
--- a/pandas/tests/io/parser/test_c_parser_only.py
+++ b/pandas/tests/io/parser/test_c_parser_only.py
@@ -603,7 +603,7 @@ def test_file_handles_mmap(c_parser_only, csv1):
     # Don't close user provided file handles.
     parser = c_parser_only
 
-    with open(csv1,encoding="utf-8") as f:
+    with open(csv1, encoding="utf-8") as f:
         with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as m:
             parser.read_csv(m)
             assert not m.closed
@@ -615,7 +615,7 @@ def test_file_binary_mode(c_parser_only):
     expected = DataFrame([[1, 2, 3], [4, 5, 6]])
 
     with tm.ensure_clean() as path:
-        with open(path, "w",encoding="utf-8") as f:
+        with open(path, "w", encoding="utf-8") as f:
             f.write("1,2,3\n4,5,6")
 
         with open(path, "rb") as f:
@@ -627,7 +627,7 @@ def test_unix_style_breaks(c_parser_only):
     # GH 11020
     parser = c_parser_only
     with tm.ensure_clean() as path:
-        with open(path, "w", newline="\n",encoding="utf-8") as f:
+        with open(path, "w", newline="\n", encoding="utf-8") as f:
             f.write("blah\n\ncol_1,col_2,col_3\n\n")
         result = parser.read_csv(path, skiprows=2, encoding="utf-8", engine="c")
     expected = DataFrame(columns=["col_1", "col_2", "col_3"])
diff --git a/pandas/tests/io/parser/test_compression.py b/pandas/tests/io/parser/test_compression.py
index 7ab9237637f84..bcba9c4a1823d 100644
--- a/pandas/tests/io/parser/test_compression.py
+++ b/pandas/tests/io/parser/test_compression.py
@@ -129,7 +129,7 @@ def test_infer_compression(all_parsers, csv1, buffer, ext):
     kwargs["compression"] = "infer"
 
     if buffer:
-        with open(csv1,encoding="utf-8") as f:
+        with open(csv1, encoding="utf-8") as f:
             result = parser.read_csv(f, **kwargs)
     else:
         ext = "." + ext if ext else ""
@@ -183,7 +183,9 @@ def test_ignore_compression_extension(all_parsers):
         with tm.ensure_clean("test.csv.zip") as path_zip:
             # make sure to create un-compressed file with zip extension
             df.to_csv(path_csv, index=False)
-            Path(path_zip).write_text(Path(path_csv).read_text(encoding="utf-8"),encoding="utf-8")
+            Path(path_zip).write_text(
+                Path(path_csv).read_text(encoding="utf-8"), encoding="utf-8"
+            )
 
             tm.assert_frame_equal(parser.read_csv(path_zip, compression=None), df)
 
diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py
index 4e273edfeec69..435b9bdade944 100644
--- a/pandas/tests/io/test_common.py
+++ b/pandas/tests/io/test_common.py
@@ -411,7 +411,7 @@ def test_constructor_bad_file(self, mmap_file):
         with pytest.raises(err, match=msg):
             icom._maybe_memory_map(non_file, True)
 
-        with open(mmap_file,encoding="utf-8") as target:
+        with open(mmap_file, encoding="utf-8") as target:
             pass
 
         msg = "I/O operation on closed file"
@@ -419,7 +419,7 @@ def test_constructor_bad_file(self, mmap_file):
             icom._maybe_memory_map(target, True)
 
     def test_next(self, mmap_file):
-        with open(mmap_file,encoding="utf-8") as target:
+        with open(mmap_file, encoding="utf-8") as target:
             lines = target.readlines()
 
             with icom.get_handle(
diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py
index da159566bd8be..eadf35aedd708 100644
--- a/pandas/tests/io/test_compression.py
+++ b/pandas/tests/io/test_compression.py
@@ -299,10 +299,10 @@ def test_ambiguous_archive_zip():
 
 def test_ambiguous_archive_tar(tmp_path):
     csvAPath = tmp_path / "a.csv"
-    with open(csvAPath, "w",encoding="utf-8") as a:
+    with open(csvAPath, "w", encoding="utf-8") as a:
         a.write("foo,bar\n")
     csvBPath = tmp_path / "b.csv"
-    with open(csvBPath, "w",encoding="utf-8") as b:
+    with open(csvBPath, "w", encoding="utf-8") as b:
         b.write("foo,bar\n")
 
     tarpath = tmp_path / "archive.tar"
diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py
index f1fea1732492c..18cc0f0b11dc9 100644
--- a/pandas/tests/io/test_gcs.py
+++ b/pandas/tests/io/test_gcs.py
@@ -193,7 +193,7 @@ class MockGCSFileSystem(AbstractFileSystem):
         def open(self, path, mode="r", *args):
             if "w" not in mode:
                 raise FileNotFoundError
-            return open(os.path.join(tmpdir, "test.parquet"), mode,encoding="utf-8")
+            return open(os.path.join(tmpdir, "test.parquet"), mode, encoding="utf-8")
 
     monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem)
     df1.to_parquet(
diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
index bf82c14a88e8e..256fb61412fc8 100644
--- a/pandas/tests/io/test_html.py
+++ b/pandas/tests/io/test_html.py
@@ -692,7 +692,7 @@ def try_remove_ws(x):
     @pytest.mark.slow
     def test_gold_canyon(self, banklist_data):
         gc = "Gold Canyon"
-        with open(banklist_data,encoding="utf-8") as f:
+        with open(banklist_data, encoding="utf-8") as f:
             raw_text = f.read()
 
         assert gc in raw_text
diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py
index b58064745ee19..cb8ef8d2833d1 100644
--- a/pandas/tests/io/xml/test_to_xml.py
+++ b/pandas/tests/io/xml/test_to_xml.py
@@ -983,7 +983,7 @@ def test_unknown_parser():
 def test_stylesheet_file_like(datapath, mode):
     xsl = datapath("io", "data", "xml", "row_field_output.xsl")
 
-    with open(xsl, mode,encoding="utf-8") as f:
+    with open(xsl, mode, encoding="utf-8") as f:
         assert geom_df.to_xml(stylesheet=f) == xsl_expected
 
 
@@ -995,7 +995,7 @@ def test_stylesheet_io(datapath, mode):
     # consider using --check-untyped-defs
     xsl_obj: BytesIO | StringIO  # type: ignore[annotation-unchecked]
 
-    with open(xsl_path, mode,encoding="utf-8") as f:
+    with open(xsl_path, mode, encoding="utf-8") as f:
         if mode == "rb":
             xsl_obj = BytesIO(f.read())
         else:
@@ -1010,7 +1010,7 @@ def test_stylesheet_io(datapath, mode):
 def test_stylesheet_buffered_reader(datapath, mode):
     xsl = datapath("io", "data", "xml", "row_field_output.xsl")
 
-    with open(xsl, mode,encoding="utf-8") as f:
+    with open(xsl, mode, encoding="utf-8") as f:
         xsl_obj = f.read()
 
     output = geom_df.to_xml(stylesheet=xsl_obj)
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index 2b3afc16b1ecd..08caa3307e9cf 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -259,7 +259,7 @@ def parser(request):
 
 def read_xml_iterparse(data, **kwargs):
     with tm.ensure_clean() as path:
-        with open(path, "w",encoding="utf-8") as f:
+        with open(path, "w", encoding="utf-8") as f:
             f.write(data)
         return read_xml(path, **kwargs)
 
@@ -267,7 +267,7 @@ def read_xml_iterparse(data, **kwargs):
 def read_xml_iterparse_comp(comp_path, compression_only, **kwargs):
     with get_handle(comp_path, "r", compression=compression_only) as handles:
         with tm.ensure_clean() as path:
-            with open(path, "w",encoding="utf-8") as f:
+            with open(path, "w", encoding="utf-8") as f:
                 f.write(handles.handle.read())
             return read_xml(path, **kwargs)
 
@@ -351,7 +351,7 @@ def test_parser_consistency_url(parser):
 
 def test_file_like(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
-    with open(filename, mode,encoding="utf-8") as f:
+    with open(filename, mode, encoding="utf-8") as f:
         df_file = read_xml(f, parser=parser)
 
     df_expected = DataFrame(
@@ -369,7 +369,7 @@ def test_file_like(datapath, parser, mode):
 
 def test_file_io(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
-    with open(filename, mode,encoding="utf-8") as f:
+    with open(filename, mode, encoding="utf-8") as f:
         xml_obj = f.read()
 
     df_io = read_xml(
@@ -392,7 +392,7 @@ def test_file_io(datapath, parser, mode):
 
 def test_file_buffered_reader_string(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
-    with open(filename, mode,encoding="utf-8") as f:
+    with open(filename, mode, encoding="utf-8") as f:
         xml_obj = f.read()
 
     df_str = read_xml(xml_obj, parser=parser)
@@ -412,7 +412,7 @@ def test_file_buffered_reader_string(datapath, parser, mode):
 
 def test_file_buffered_reader_no_xml_declaration(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
-    with open(filename, mode,encoding="utf-8") as f:
+    with open(filename, mode, encoding="utf-8") as f:
         next(f)
         xml_obj = f.read()
 
@@ -1154,7 +1154,7 @@ def test_stylesheet_file_like(datapath, mode):
     kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
     xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
 
-    with open(xsl, mode,encoding="utf-8") as f:
+    with open(xsl, mode, encoding="utf-8") as f:
         df_style = read_xml(
             kml,
             xpath=".//k:Placemark",
@@ -1174,7 +1174,7 @@ def test_stylesheet_io(datapath, mode):
     # consider using --check-untyped-defs
     xsl_obj: BytesIO | StringIO  # type: ignore[annotation-unchecked]
 
-    with open(xsl, mode,encoding="utf-8") as f:
+    with open(xsl, mode, encoding="utf-8") as f:
         if mode == "rb":
             xsl_obj = BytesIO(f.read())
         else:
@@ -1195,7 +1195,7 @@ def test_stylesheet_buffered_reader(datapath, mode):
     kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
     xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
 
-    with open(xsl, mode,encoding="utf-8") as f:
+    with open(xsl, mode, encoding="utf-8") as f:
         xsl_obj = f.read()
 
     df_style = read_xml(
@@ -1355,7 +1355,7 @@ def test_stylesheet_file_close(datapath, mode):
     # consider using --check-untyped-defs
     xsl_obj: BytesIO | StringIO  # type: ignore[annotation-unchecked]
 
-    with open(xsl, mode,encoding="utf-8") as f:
+    with open(xsl, mode, encoding="utf-8") as f:
         if mode == "rb":
             xsl_obj = BytesIO(f.read())
         else:
@@ -1407,7 +1407,7 @@ def test_string_error(parser):
 def test_file_like_iterparse(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
 
-    with open(filename, mode,encoding="utf-8") as f:
+    with open(filename, mode, encoding="utf-8") as f:
         if mode == "r" and parser == "lxml":
             with pytest.raises(
                 TypeError, match=("reading file objects must return bytes objects")
@@ -1444,7 +1444,7 @@ def test_file_io_iterparse(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
 
     funcIO = StringIO if mode == "r" else BytesIO
-    with open(filename, mode,encoding="utf-8") as f:
+    with open(filename, mode, encoding="utf-8") as f:
         with funcIO(f.read()) as b:
             if mode == "r" and parser == "lxml":
                 with pytest.raises(
@@ -1550,7 +1550,7 @@ def test_bad_xml(parser):
   </row>
 """
     with tm.ensure_clean(filename="bad.xml") as path:
-        with open(path, "w",encoding="utf-8") as f:
+        with open(path, "w", encoding="utf-8") as f:
             f.write(bad_xml)
 
         with pytest.raises(
diff --git a/pandas/tests/io/xml/test_xml_dtypes.py b/pandas/tests/io/xml/test_xml_dtypes.py
index ba563d17f277f..d62b9fa27e264 100644
--- a/pandas/tests/io/xml/test_xml_dtypes.py
+++ b/pandas/tests/io/xml/test_xml_dtypes.py
@@ -29,7 +29,7 @@ def iterparse(request):
 
 def read_xml_iterparse(data, **kwargs):
     with tm.ensure_clean() as path:
-        with open(path, "w",encoding="utf-8") as f:
+        with open(path, "w", encoding="utf-8") as f:
             f.write(data)
         return read_xml(path, **kwargs)
 
diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py
index d5384de4079a7..070ab872a4e5b 100644
--- a/pandas/tests/series/methods/test_to_csv.py
+++ b/pandas/tests/series/methods/test_to_csv.py
@@ -52,7 +52,7 @@ def test_from_csv(self, datetime_series, string_series):
             series_h = self.read_csv(path, header=0)
             assert series_h.name == "series"
 
-            with open(path, "w",encoding="utf-8") as outfile:
+            with open(path, "w", encoding="utf-8") as outfile:
                 outfile.write("1998-01-01|1.0\n1999-01-01|2.0")
 
             series = self.read_csv(path, sep="|", parse_dates=True)
@@ -69,7 +69,7 @@ def test_to_csv(self, datetime_series):
         with tm.ensure_clean() as path:
             datetime_series.to_csv(path, header=False)
 
-            with open(path, newline=None,encoding="utf-8") as f:
+            with open(path, newline=None, encoding="utf-8") as f:
                 lines = f.readlines()
             assert lines[1] != "\n"
 
diff --git a/pandas/tests/util/test_show_versions.py b/pandas/tests/util/test_show_versions.py
index 856aa0062e43d..72c9db23b2108 100644
--- a/pandas/tests/util/test_show_versions.py
+++ b/pandas/tests/util/test_show_versions.py
@@ -16,7 +16,7 @@ def test_show_versions(tmpdir):
 
     pd.show_versions(as_json=as_json)
 
-    with open(as_json,encoding="utf-8") as fd:
+    with open(as_json, encoding="utf-8") as fd:
         # check if file output is valid JSON, will raise an exception if not
         result = json.load(fd)
 
@@ -75,7 +75,7 @@ def test_json_output_match(capsys, tmpdir):
 
     out_path = os.path.join(tmpdir, "test_json.json")
     pd.show_versions(as_json=out_path)
-    with open(out_path,encoding="utf-8") as out_fd:
+    with open(out_path, encoding="utf-8") as out_fd:
         result_file = out_fd.read()
 
     assert result_console == result_file
diff --git a/scripts/generate_pxi.py b/scripts/generate_pxi.py
index 586b2d4fe3e35..47648a3937b4c 100644
--- a/scripts/generate_pxi.py
+++ b/scripts/generate_pxi.py
@@ -5,11 +5,11 @@
 
 
 def process_tempita(pxifile, outfile):
-    with open(pxifile,encoding="utf-8") as f:
+    with open(pxifile, encoding="utf-8") as f:
         tmpl = f.read()
     pyxcontent = Tempita.sub(tmpl)
 
-    with open(outfile, "w",encoding="utf-8") as f:
+    with open(outfile, "w", encoding="utf-8") as f:
         f.write(pyxcontent)
 
 
diff --git a/scripts/generate_version.py b/scripts/generate_version.py
index 3b778567e2335..8a93e4c1df55e 100644
--- a/scripts/generate_version.py
+++ b/scripts/generate_version.py
@@ -8,7 +8,7 @@ def write_version_info(path):
     if os.environ.get("MESON_DIST_ROOT"):
         # raise ValueError("dist root is", os.environ.get("MESON_DIST_ROOT"))
         path = os.path.join(os.environ.get("MESON_DIST_ROOT"), path)
-    with open(path, "w",encoding="utf-8") as file:
+    with open(path, "w", encoding="utf-8") as file:
         file.write(f'__version__="{versioneer.get_version()}"\n')
         file.write(
             f'__git_version__="{versioneer.get_versions()["full-revisionid"]}"\n'
diff --git a/scripts/pandas_errors_documented.py b/scripts/pandas_errors_documented.py
index f23d9bc979334..116a63b33eaf0 100644
--- a/scripts/pandas_errors_documented.py
+++ b/scripts/pandas_errors_documented.py
@@ -34,7 +34,7 @@ def main(argv: Sequence[str] | None = None) -> None:
     args = parser.parse_args(argv)
     with open(args.path, encoding="utf-8") as f:
         file_errors = get_defined_errors(f.read())
-    with open(API_PATH,encoding="utf-8") as f:
+    with open(API_PATH, encoding="utf-8") as f:
         doc_errors = {
             line.split(".")[1].strip() for line in f.readlines() if "errors" in line
         }
diff --git a/scripts/sort_whatsnew_note.py b/scripts/sort_whatsnew_note.py
index b7a427329a2df..531ea57244b23 100644
--- a/scripts/sort_whatsnew_note.py
+++ b/scripts/sort_whatsnew_note.py
@@ -63,12 +63,12 @@ def main(argv: Sequence[str] | None = None) -> int:
     args = parser.parse_args(argv)
     ret = 0
     for path in args.paths:
-        with open(path,encoding="utf-8") as fd:
+        with open(path, encoding="utf-8") as fd:
             content = fd.read()
         new_content = sort_whatsnew_note(content)
         if content != new_content:
             ret |= 1
-            with open(path, "w",encoding="utf-8") as fd:
+            with open(path, "w", encoding="utf-8") as fd:
                 fd.write(new_content)
     return ret
 
diff --git a/scripts/tests/test_validate_min_versions_in_sync.py b/scripts/tests/test_validate_min_versions_in_sync.py
index d454bf7063c3c..ac33f8dcbffaf 100644
--- a/scripts/tests/test_validate_min_versions_in_sync.py
+++ b/scripts/tests/test_validate_min_versions_in_sync.py
@@ -49,13 +49,13 @@
 def test_pin_min_versions_to_yaml_file(src_toml, src_yaml, expected_yaml):
     with open(src_toml, "rb") as toml_f:
         toml_map = tomllib.load(toml_f)
-    with open(src_yaml,encoding="utf-8") as yaml_f:
+    with open(src_yaml, encoding="utf-8") as yaml_f:
         yaml_file_data = yaml_f.read()
     yaml_file = yaml.safe_load(yaml_file_data)
     yaml_dependencies = yaml_file["dependencies"]
     yaml_map = get_yaml_map_from(yaml_dependencies)
     toml_map = get_toml_map_from(toml_map)
     result_yaml_file = pin_min_versions_to_yaml_file(yaml_map, toml_map, yaml_file_data)
-    with open(expected_yaml,encoding="utf-8") as yaml_f:
+    with open(expected_yaml, encoding="utf-8") as yaml_f:
         dummy_yaml_expected_file_1 = yaml_f.read()
     assert result_yaml_file == dummy_yaml_expected_file_1
diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py
index 6b43506bd03f1..c9eb476ab65fa 100755
--- a/scripts/validate_docstrings.py
+++ b/scripts/validate_docstrings.py
@@ -367,7 +367,7 @@ def get_all_api_items():
     base_path = pathlib.Path(__file__).parent.parent
     api_doc_fnames = pathlib.Path(base_path, "doc", "source", "reference")
     for api_doc_fname in api_doc_fnames.glob("*.rst"):
-        with open(api_doc_fname,encoding="utf-8") as f:
+        with open(api_doc_fname, encoding="utf-8") as f:
             yield from get_api_items(f)
 
 
diff --git a/scripts/validate_exception_location.py b/scripts/validate_exception_location.py
index 82154e82786b9..5f77e4c78db82 100644
--- a/scripts/validate_exception_location.py
+++ b/scripts/validate_exception_location.py
@@ -36,7 +36,7 @@
 
 
 def get_warnings_and_exceptions_from_api_path() -> set[str]:
-    with open(API_PATH,encoding="utf-8") as f:
+    with open(API_PATH, encoding="utf-8") as f:
         doc_errors = {
             line.split(".")[1].strip() for line in f.readlines() if "errors" in line
         }
diff --git a/scripts/validate_min_versions_in_sync.py b/scripts/validate_min_versions_in_sync.py
index f4d51142876b9..cb03276d2dd93 100755
--- a/scripts/validate_min_versions_in_sync.py
+++ b/scripts/validate_min_versions_in_sync.py
@@ -63,7 +63,7 @@ def pin_min_versions_to_ci_deps() -> int:
         toml_dependencies = tomllib.load(toml_f)
     ret = 0
     for curr_file in all_yaml_files:
-        with open(curr_file,encoding="utf-8") as yaml_f:
+        with open(curr_file, encoding="utf-8") as yaml_f:
             yaml_start_data = yaml_f.read()
         yaml_file = yaml.safe_load(yaml_start_data)
         yaml_dependencies = yaml_file["dependencies"]
@@ -73,7 +73,7 @@ def pin_min_versions_to_ci_deps() -> int:
             yaml_map, toml_map, yaml_start_data
         )
         if yaml_result_data != yaml_start_data:
-            with open(curr_file, "w",encoding="utf-8") as f:
+            with open(curr_file, "w", encoding="utf-8") as f:
                 f.write(yaml_result_data)
             ret |= 1
     return ret
diff --git a/setup.py b/setup.py
index 8fcd4804cf6e6..52739a97bec2a 100755
--- a/setup.py
+++ b/setup.py
@@ -88,11 +88,11 @@ def render_templates(cls, pxifiles):
                 # if .pxi.in is not updated, no need to output .pxi
                 continue
 
-            with open(pxifile,encoding="utf-8") as f:
+            with open(pxifile, encoding="utf-8") as f:
                 tmpl = f.read()
             pyxcontent = Tempita.sub(tmpl)
 
-            with open(outfile, "w",encoding="utf-8") as f:
+            with open(outfile, "w", encoding="utf-8") as f:
                 f.write(pyxcontent)
 
     def build_extensions(self):
diff --git a/web/pandas_web.py b/web/pandas_web.py
index 75dde11c25138..9191cde31c20f 100755
--- a/web/pandas_web.py
+++ b/web/pandas_web.py
@@ -110,7 +110,7 @@ def blog_add_posts(context):
                 md = markdown.Markdown(
                     extensions=context["main"]["markdown_extensions"]
                 )
-                with open(os.path.join(posts_path, fname),encoding="utf-8") as f:
+                with open(os.path.join(posts_path, fname), encoding="utf-8") as f:
                     html = md.convert(f.read())
                 title = md.Meta["title"][0]
                 summary = re.sub(tag_expr, "", html)
@@ -197,7 +197,11 @@ def maintainers_add_info(context):
 
         # save the data fetched from github to use it in case we exceed
         # git github api quota in the future
-        with open(pathlib.Path(context["target_path"]) / "maintainers.json", "w",encoding="utf-8") as f:
+        with open(
+            pathlib.Path(context["target_path"]) / "maintainers.json",
+            "w",
+            encoding="utf-8",
+        ) as f:
             json.dump(maintainers_info, f)
 
         return context
@@ -220,7 +224,11 @@ def home_add_releases(context):
             resp.raise_for_status()
             releases = resp.json()
 
-        with open(pathlib.Path(context["target_path"]) / "releases.json", "w",encoding="utf-8") as f:
+        with open(
+            pathlib.Path(context["target_path"]) / "releases.json",
+            "w",
+            encoding="utf-8",
+        ) as f:
             json.dump(releases, f, default=datetime.datetime.isoformat)
 
         for release in releases:
@@ -304,7 +312,9 @@ def roadmap_pdeps(context):
             resp.raise_for_status()
             pdeps = resp.json()
 
-        with open(pathlib.Path(context["target_path"]) / "pdeps.json", "w",encoding="utf-8") as f:
+        with open(
+            pathlib.Path(context["target_path"]) / "pdeps.json", "w", encoding="utf-8"
+        ) as f:
             json.dump(pdeps, f)
 
         for pdep in sorted(pdeps["items"], key=operator.itemgetter("title")):
@@ -346,7 +356,7 @@ def get_context(config_fname: str, **kwargs):
     Load the config yaml as the base context, and enrich it with the
     information added by the context preprocessors defined in the file.
     """
-    with open(config_fname,encoding="utf-8") as f:
+    with open(config_fname, encoding="utf-8") as f:
         context = yaml.safe_load(f)
 
     context["source_path"] = os.path.dirname(config_fname)
@@ -418,7 +428,7 @@ def main(
 
         extension = os.path.splitext(fname)[-1]
         if extension in (".html", ".md"):
-            with open(os.path.join(source_path, fname),encoding="utf-8") as f:
+            with open(os.path.join(source_path, fname), encoding="utf-8") as f:
                 content = f.read()
             if extension == ".md":
                 body = markdown.markdown(
@@ -431,7 +441,9 @@ def main(
             context["base_url"] = "".join(["../"] * os.path.normpath(fname).count("/"))
             content = jinja_env.from_string(content).render(**context)
             fname_html = os.path.splitext(fname)[0] + ".html"
-            with open(os.path.join(target_path, fname_html), "w",encoding="utf-8") as f:
+            with open(
+                os.path.join(target_path, fname_html), "w", encoding="utf-8"
+            ) as f:
                 f.write(content)
         else:
             shutil.copy(

From 8890943c683467d4a875cdf4b1e1118a90a03301 Mon Sep 17 00:00:00 2001
From: Ketu Patel <ketup806@gmail.com>
Date: Sat, 29 Apr 2023 19:17:42 +0530
Subject: [PATCH 07/17] Some Changed reflected

---
 pandas/_testing/contexts.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py
index f11041d477701..f09c59e09fbb2 100644
--- a/pandas/_testing/contexts.py
+++ b/pandas/_testing/contexts.py
@@ -126,7 +126,11 @@ def ensure_clean(
     handle_or_str: str | IO = str(path)
     if return_filelike:
         kwargs.setdefault("mode", "w+b")
-        handle_or_str = open(path, **kwargs, encoding="utf-8")
+        handle_or_str = open(
+            path,
+            encoding=kwargs.get("encoding", None),
+            **{key: value for key, value in kwargs.items() if key != "encoding"},
+        )
 
     try:
         yield handle_or_str

From dcbe4dcc9a81afc2594c675e9e7da339899b6054 Mon Sep 17 00:00:00 2001
From: Ketu Patel <ketup806@gmail.com>
Date: Sun, 30 Apr 2023 15:51:14 +0530
Subject: [PATCH 08/17] test_xml Updated

---
 pandas/tests/io/xml/test_xml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index 08caa3307e9cf..bf8da72adb732 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -1444,7 +1444,7 @@ def test_file_io_iterparse(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
 
     funcIO = StringIO if mode == "r" else BytesIO
-    with open(filename, mode, encoding="utf-8") as f:
+    with open(filename, mode, encoding="utf-8" if mode=="r" else None) as f:
         with funcIO(f.read()) as b:
             if mode == "r" and parser == "lxml":
                 with pytest.raises(

From b4d107b44b972d039188b1a5a738753c433a3eeb Mon Sep 17 00:00:00 2001
From: Ketu Patel <ketup806@gmail.com>
Date: Sun, 30 Apr 2023 16:03:08 +0530
Subject: [PATCH 09/17] Pre-commit check passed

---
 pandas/tests/io/xml/test_xml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index bf8da72adb732..df369546a4a45 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -1444,7 +1444,7 @@ def test_file_io_iterparse(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
 
     funcIO = StringIO if mode == "r" else BytesIO
-    with open(filename, mode, encoding="utf-8" if mode=="r" else None) as f:
+    with open(filename, mode, encoding="utf-8" if mode == "r" else None) as f:
         with funcIO(f.read()) as b:
             if mode == "r" and parser == "lxml":
                 with pytest.raises(

From 959dadcd34f7aecb73338361e69159b78287d1db Mon Sep 17 00:00:00 2001
From: Ketu Patel <ketup806@gmail.com>
Date: Sun, 30 Apr 2023 18:01:22 +0530
Subject: [PATCH 10/17] Mode changed in xml file

---
 pandas/tests/io/xml/test_xml.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index df369546a4a45..c28f8fd788787 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -1444,7 +1444,11 @@ def test_file_io_iterparse(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
 
     funcIO = StringIO if mode == "r" else BytesIO
-    with open(filename, mode, encoding="utf-8" if mode == "r" else None) as f:
+    with open(
+        filename,
+        mode="utf-8" if mode == "r" else None,
+        encoding="utf-8" if mode == "r" else None,
+    ) as f:
         with funcIO(f.read()) as b:
             if mode == "r" and parser == "lxml":
                 with pytest.raises(

From 7aebe0519d6d919e52c2147ea85eb73458c54f87 Mon Sep 17 00:00:00 2001
From: Ketu Patel <ketup806@gmail.com>
Date: Sun, 30 Apr 2023 18:36:45 +0530
Subject: [PATCH 11/17] mode reverted

---
 pandas/tests/io/xml/test_xml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index c28f8fd788787..5d101fc2ceb59 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -1446,7 +1446,7 @@ def test_file_io_iterparse(datapath, parser, mode):
     funcIO = StringIO if mode == "r" else BytesIO
     with open(
         filename,
-        mode="utf-8" if mode == "r" else None,
+        mode,
         encoding="utf-8" if mode == "r" else None,
     ) as f:
         with funcIO(f.read()) as b:

From c5fb9d6db85595fabf973e8fc4a77e6ae7f573d2 Mon Sep 17 00:00:00 2001
From: Ketu Patel <ketup806@gmail.com>
Date: Fri, 5 May 2023 19:03:19 +0530
Subject: [PATCH 12/17] Try to fix errors

---
 .../io/parser/common/test_file_buffer_url.py  | 11 ++++++---
 pandas/tests/io/xml/test_to_xml.py            | 24 ++++++++++++-------
 2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py
index 1f3ee18541f4d..7884e67044e9e 100644
--- a/pandas/tests/io/parser/common/test_file_buffer_url.py
+++ b/pandas/tests/io/parser/common/test_file_buffer_url.py
@@ -285,9 +285,14 @@ def test_file_handles_with_open(all_parsers, csv1):
     parser = all_parsers
 
     for mode in ["r", "rb"]:
-        with open(csv1, mode, encoding="utf-8") as f:
-            parser.read_csv(f)
-            assert not f.closed
+        if mode == "rb":
+            with open(csv1, mode) as f:
+                parser.read_csv(f)
+                assert not f.closed
+        else:
+            with open(csv1, mode, encoding="utf-8") as f:
+                parser.read_csv(f)
+                assert not f.closed
 
 
 def test_invalid_file_buffer_class(all_parsers):
diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py
index cb8ef8d2833d1..b080da206ce24 100644
--- a/pandas/tests/io/xml/test_to_xml.py
+++ b/pandas/tests/io/xml/test_to_xml.py
@@ -983,9 +983,12 @@ def test_unknown_parser():
 def test_stylesheet_file_like(datapath, mode):
     xsl = datapath("io", "data", "xml", "row_field_output.xsl")
 
-    with open(xsl, mode, encoding="utf-8") as f:
-        assert geom_df.to_xml(stylesheet=f) == xsl_expected
-
+    if mode == "rb":
+        with open(xsl, mode) as f:
+            assert geom_df.to_xml(stylesheet=f) == xsl_expected
+    else:
+        with open(xsl, mode, encoding="utf-8") as f:
+            assert geom_df.to_xml(stylesheet=f) == xsl_expected
 
 @td.skip_if_no("lxml")
 def test_stylesheet_io(datapath, mode):
@@ -995,10 +998,11 @@ def test_stylesheet_io(datapath, mode):
     # consider using --check-untyped-defs
     xsl_obj: BytesIO | StringIO  # type: ignore[annotation-unchecked]
 
-    with open(xsl_path, mode, encoding="utf-8") as f:
-        if mode == "rb":
+    if mode == "rb":
+        with open(xsl_path, "rb") as f:
             xsl_obj = BytesIO(f.read())
-        else:
+    else:
+        with open(xsl_path, "r", encoding="utf-8") as f:
             xsl_obj = StringIO(f.read())
 
     output = geom_df.to_xml(stylesheet=xsl_obj)
@@ -1010,8 +1014,12 @@ def test_stylesheet_io(datapath, mode):
 def test_stylesheet_buffered_reader(datapath, mode):
     xsl = datapath("io", "data", "xml", "row_field_output.xsl")
 
-    with open(xsl, mode, encoding="utf-8") as f:
-        xsl_obj = f.read()
+    if mode == "rb":
+        with open(xsl, mode) as f:
+            xsl_obj = f.read()
+    else:
+        with open(xsl, mode, encoding="utf-8") as f:
+            xsl_obj = f.read()
 
     output = geom_df.to_xml(stylesheet=xsl_obj)
 

From d250a1d148f326be2df17f0e4e19e29a7b4b82b6 Mon Sep 17 00:00:00 2001
From: Ketu Patel <ketup806@gmail.com>
Date: Sat, 6 May 2023 00:47:24 +0530
Subject: [PATCH 13/17] error-checks

---
 pandas/tests/io/parser/common/test_file_buffer_url.py | 2 +-
 pandas/tests/io/xml/test_to_xml.py                    | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py
index 7884e67044e9e..3bbd9c422dac8 100644
--- a/pandas/tests/io/parser/common/test_file_buffer_url.py
+++ b/pandas/tests/io/parser/common/test_file_buffer_url.py
@@ -286,7 +286,7 @@ def test_file_handles_with_open(all_parsers, csv1):
 
     for mode in ["r", "rb"]:
         if mode == "rb":
-            with open(csv1, mode) as f:
+            with open(csv1, mode, encoding="utf-8") as f:
                 parser.read_csv(f)
                 assert not f.closed
         else:
diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py
index b080da206ce24..59534bd421199 100644
--- a/pandas/tests/io/xml/test_to_xml.py
+++ b/pandas/tests/io/xml/test_to_xml.py
@@ -984,12 +984,13 @@ def test_stylesheet_file_like(datapath, mode):
     xsl = datapath("io", "data", "xml", "row_field_output.xsl")
 
     if mode == "rb":
-        with open(xsl, mode) as f:
+        with open(xsl, mode, encoding="utf-8") as f:
             assert geom_df.to_xml(stylesheet=f) == xsl_expected
     else:
         with open(xsl, mode, encoding="utf-8") as f:
             assert geom_df.to_xml(stylesheet=f) == xsl_expected
 
+
 @td.skip_if_no("lxml")
 def test_stylesheet_io(datapath, mode):
     xsl_path = datapath("io", "data", "xml", "row_field_output.xsl")
@@ -1002,7 +1003,7 @@ def test_stylesheet_io(datapath, mode):
         with open(xsl_path, "rb") as f:
             xsl_obj = BytesIO(f.read())
     else:
-        with open(xsl_path, "r", encoding="utf-8") as f:
+        with open(xsl_path, encoding="utf-8") as f:
             xsl_obj = StringIO(f.read())
 
     output = geom_df.to_xml(stylesheet=xsl_obj)
@@ -1015,7 +1016,7 @@ def test_stylesheet_buffered_reader(datapath, mode):
     xsl = datapath("io", "data", "xml", "row_field_output.xsl")
 
     if mode == "rb":
-        with open(xsl, mode) as f:
+        with open(xsl, mode, encoding="utf-8") as f:
             xsl_obj = f.read()
     else:
         with open(xsl, mode, encoding="utf-8") as f:

From 640e8e0c41a97200db88e35c7b09df5909a43309 Mon Sep 17 00:00:00 2001
From: Ketu Patel <ketup806@gmail.com>
Date: Sat, 6 May 2023 20:41:54 +0530
Subject: [PATCH 14/17] Fix Some errors

---
 .../io/parser/common/test_file_buffer_url.py   |  2 +-
 pandas/tests/io/xml/test_to_xml.py             |  4 ++--
 pandas/tests/io/xml/test_xml.py                | 18 +++++++++---------
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py
index 3bbd9c422dac8..7884e67044e9e 100644
--- a/pandas/tests/io/parser/common/test_file_buffer_url.py
+++ b/pandas/tests/io/parser/common/test_file_buffer_url.py
@@ -286,7 +286,7 @@ def test_file_handles_with_open(all_parsers, csv1):
 
     for mode in ["r", "rb"]:
         if mode == "rb":
-            with open(csv1, mode, encoding="utf-8") as f:
+            with open(csv1, mode) as f:
                 parser.read_csv(f)
                 assert not f.closed
         else:
diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py
index 59534bd421199..1e6339e5c75d3 100644
--- a/pandas/tests/io/xml/test_to_xml.py
+++ b/pandas/tests/io/xml/test_to_xml.py
@@ -984,7 +984,7 @@ def test_stylesheet_file_like(datapath, mode):
     xsl = datapath("io", "data", "xml", "row_field_output.xsl")
 
     if mode == "rb":
-        with open(xsl, mode, encoding="utf-8") as f:
+        with open(xsl, mode) as f:
             assert geom_df.to_xml(stylesheet=f) == xsl_expected
     else:
         with open(xsl, mode, encoding="utf-8") as f:
@@ -1016,7 +1016,7 @@ def test_stylesheet_buffered_reader(datapath, mode):
     xsl = datapath("io", "data", "xml", "row_field_output.xsl")
 
     if mode == "rb":
-        with open(xsl, mode, encoding="utf-8") as f:
+        with open(xsl, mode) as f:
             xsl_obj = f.read()
     else:
         with open(xsl, mode, encoding="utf-8") as f:
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index 28459ee86712d..49b8f8956de5b 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -351,7 +351,7 @@ def test_parser_consistency_url(parser):
 
 def test_file_like(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
-    with open(filename, mode, encoding="utf-8") as f:
+    with open(filename, mode) as f:
         df_file = read_xml(f, parser=parser)
 
     df_expected = DataFrame(
@@ -369,7 +369,7 @@ def test_file_like(datapath, parser, mode):
 
 def test_file_io(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
-    with open(filename, mode, encoding="utf-8") as f:
+    with open(filename, mode) as f:
         xml_obj = f.read()
 
     df_io = read_xml(
@@ -392,7 +392,7 @@ def test_file_io(datapath, parser, mode):
 
 def test_file_buffered_reader_string(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
-    with open(filename, mode, encoding="utf-8") as f:
+    with open(filename, mode) as f:
         xml_obj = f.read()
 
     df_str = read_xml(xml_obj, parser=parser)
@@ -412,7 +412,7 @@ def test_file_buffered_reader_string(datapath, parser, mode):
 
 def test_file_buffered_reader_no_xml_declaration(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
-    with open(filename, mode, encoding="utf-8") as f:
+    with open(filename, mode) as f:
         next(f)
         xml_obj = f.read()
 
@@ -1163,7 +1163,7 @@ def test_stylesheet_file_like(datapath, mode):
     kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
     xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
 
-    with open(xsl, mode, encoding="utf-8") as f:
+    with open(xsl, mode) as f:
         df_style = read_xml(
             kml,
             xpath=".//k:Placemark",
@@ -1183,7 +1183,7 @@ def test_stylesheet_io(datapath, mode):
     # consider using --check-untyped-defs
     xsl_obj: BytesIO | StringIO  # type: ignore[annotation-unchecked]
 
-    with open(xsl, mode, encoding="utf-8") as f:
+    with open(xsl, mode) as f:
         if mode == "rb":
             xsl_obj = BytesIO(f.read())
         else:
@@ -1204,7 +1204,7 @@ def test_stylesheet_buffered_reader(datapath, mode):
     kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
     xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
 
-    with open(xsl, mode, encoding="utf-8") as f:
+    with open(xsl, mode) as f:
         xsl_obj = f.read()
 
     df_style = read_xml(
@@ -1364,7 +1364,7 @@ def test_stylesheet_file_close(datapath, mode):
     # consider using --check-untyped-defs
     xsl_obj: BytesIO | StringIO  # type: ignore[annotation-unchecked]
 
-    with open(xsl, mode, encoding="utf-8") as f:
+    with open(xsl, mode) as f:
         if mode == "rb":
             xsl_obj = BytesIO(f.read())
         else:
@@ -1416,7 +1416,7 @@ def test_string_error(parser):
 def test_file_like_iterparse(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
 
-    with open(filename, mode, encoding="utf-8") as f:
+    with open(filename, mode) as f:
         if mode == "r" and parser == "lxml":
             with pytest.raises(
                 TypeError, match=("reading file objects must return bytes objects")

From 98c004c3d34c8677f7b53f6b480e9f7c20ddc9b9 Mon Sep 17 00:00:00 2001
From: Ketu Patel <ketup806@gmail.com>
Date: Sat, 6 May 2023 21:47:15 +0530
Subject: [PATCH 15/17] Unspecified-encodingFixed

---
 .../io/parser/common/test_file_buffer_url.py   |  2 +-
 pandas/tests/io/xml/test_to_xml.py             |  4 ++--
 pandas/tests/io/xml/test_xml.py                | 18 +++++++++---------
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py
index 7884e67044e9e..aaba42018a439 100644
--- a/pandas/tests/io/parser/common/test_file_buffer_url.py
+++ b/pandas/tests/io/parser/common/test_file_buffer_url.py
@@ -286,7 +286,7 @@ def test_file_handles_with_open(all_parsers, csv1):
 
     for mode in ["r", "rb"]:
         if mode == "rb":
-            with open(csv1, mode) as f:
+            with open(csv1, mode, encoding="utf-8" if mode == "r" else None) as f:
                 parser.read_csv(f)
                 assert not f.closed
         else:
diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py
index 1e6339e5c75d3..057734595311e 100644
--- a/pandas/tests/io/xml/test_to_xml.py
+++ b/pandas/tests/io/xml/test_to_xml.py
@@ -984,7 +984,7 @@ def test_stylesheet_file_like(datapath, mode):
     xsl = datapath("io", "data", "xml", "row_field_output.xsl")
 
     if mode == "rb":
-        with open(xsl, mode) as f:
+        with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f:
             assert geom_df.to_xml(stylesheet=f) == xsl_expected
     else:
         with open(xsl, mode, encoding="utf-8") as f:
@@ -1016,7 +1016,7 @@ def test_stylesheet_buffered_reader(datapath, mode):
     xsl = datapath("io", "data", "xml", "row_field_output.xsl")
 
     if mode == "rb":
-        with open(xsl, mode) as f:
+        with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f:
             xsl_obj = f.read()
     else:
         with open(xsl, mode, encoding="utf-8") as f:
diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py
index 49b8f8956de5b..04abebe4a0a71 100644
--- a/pandas/tests/io/xml/test_xml.py
+++ b/pandas/tests/io/xml/test_xml.py
@@ -351,7 +351,7 @@ def test_parser_consistency_url(parser):
 
 def test_file_like(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
-    with open(filename, mode) as f:
+    with open(filename, mode, encoding="utf-8" if mode == "r" else None) as f:
         df_file = read_xml(f, parser=parser)
 
     df_expected = DataFrame(
@@ -369,7 +369,7 @@ def test_file_like(datapath, parser, mode):
 
 def test_file_io(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
-    with open(filename, mode) as f:
+    with open(filename, mode, encoding="utf-8" if mode == "r" else None) as f:
         xml_obj = f.read()
 
     df_io = read_xml(
@@ -392,7 +392,7 @@ def test_file_io(datapath, parser, mode):
 
 def test_file_buffered_reader_string(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
-    with open(filename, mode) as f:
+    with open(filename, mode, encoding="utf-8" if mode == "r" else None) as f:
         xml_obj = f.read()
 
     df_str = read_xml(xml_obj, parser=parser)
@@ -412,7 +412,7 @@ def test_file_buffered_reader_string(datapath, parser, mode):
 
 def test_file_buffered_reader_no_xml_declaration(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
-    with open(filename, mode) as f:
+    with open(filename, mode, encoding="utf-8" if mode == "r" else None) as f:
         next(f)
         xml_obj = f.read()
 
@@ -1163,7 +1163,7 @@ def test_stylesheet_file_like(datapath, mode):
     kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
     xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
 
-    with open(xsl, mode) as f:
+    with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f:
         df_style = read_xml(
             kml,
             xpath=".//k:Placemark",
@@ -1183,7 +1183,7 @@ def test_stylesheet_io(datapath, mode):
     # consider using --check-untyped-defs
     xsl_obj: BytesIO | StringIO  # type: ignore[annotation-unchecked]
 
-    with open(xsl, mode) as f:
+    with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f:
         if mode == "rb":
             xsl_obj = BytesIO(f.read())
         else:
@@ -1204,7 +1204,7 @@ def test_stylesheet_buffered_reader(datapath, mode):
     kml = datapath("io", "data", "xml", "cta_rail_lines.kml")
     xsl = datapath("io", "data", "xml", "flatten_doc.xsl")
 
-    with open(xsl, mode) as f:
+    with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f:
         xsl_obj = f.read()
 
     df_style = read_xml(
@@ -1364,7 +1364,7 @@ def test_stylesheet_file_close(datapath, mode):
     # consider using --check-untyped-defs
     xsl_obj: BytesIO | StringIO  # type: ignore[annotation-unchecked]
 
-    with open(xsl, mode) as f:
+    with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f:
         if mode == "rb":
             xsl_obj = BytesIO(f.read())
         else:
@@ -1416,7 +1416,7 @@ def test_string_error(parser):
 def test_file_like_iterparse(datapath, parser, mode):
     filename = datapath("io", "data", "xml", "books.xml")
 
-    with open(filename, mode) as f:
+    with open(filename, mode, encoding="utf-8" if mode == "r" else None) as f:
         if mode == "r" and parser == "lxml":
             with pytest.raises(
                 TypeError, match=("reading file objects must return bytes objects")

From 513aebd26b35f7e96100af0e2ddd429deebfb3d9 Mon Sep 17 00:00:00 2001
From: Ketu Patel <ketup806@gmail.com>
Date: Sun, 7 May 2023 10:22:22 +0530
Subject: [PATCH 16/17] final commited

---
 .../io/parser/common/test_file_buffer_url.py  | 11 +++------
 pandas/tests/io/xml/test_to_xml.py            | 23 ++++++-------------
 2 files changed, 10 insertions(+), 24 deletions(-)

diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py
index aaba42018a439..ba196a532adf6 100644
--- a/pandas/tests/io/parser/common/test_file_buffer_url.py
+++ b/pandas/tests/io/parser/common/test_file_buffer_url.py
@@ -285,14 +285,9 @@ def test_file_handles_with_open(all_parsers, csv1):
     parser = all_parsers
 
     for mode in ["r", "rb"]:
-        if mode == "rb":
-            with open(csv1, mode, encoding="utf-8" if mode == "r" else None) as f:
-                parser.read_csv(f)
-                assert not f.closed
-        else:
-            with open(csv1, mode, encoding="utf-8") as f:
-                parser.read_csv(f)
-                assert not f.closed
+        with open(csv1, mode, encoding="utf-8" if mode == "r" else None) as f:
+            parser.read_csv(f)
+            assert not f.closed
 
 
 def test_invalid_file_buffer_class(all_parsers):
diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py
index 057734595311e..1f1f44f408fc1 100644
--- a/pandas/tests/io/xml/test_to_xml.py
+++ b/pandas/tests/io/xml/test_to_xml.py
@@ -983,12 +983,8 @@ def test_unknown_parser():
 def test_stylesheet_file_like(datapath, mode):
     xsl = datapath("io", "data", "xml", "row_field_output.xsl")
 
-    if mode == "rb":
-        with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f:
-            assert geom_df.to_xml(stylesheet=f) == xsl_expected
-    else:
-        with open(xsl, mode, encoding="utf-8") as f:
-            assert geom_df.to_xml(stylesheet=f) == xsl_expected
+    with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f:
+        assert geom_df.to_xml(stylesheet=f) == xsl_expected
 
 
 @td.skip_if_no("lxml")
@@ -999,11 +995,10 @@ def test_stylesheet_io(datapath, mode):
     # consider using --check-untyped-defs
     xsl_obj: BytesIO | StringIO  # type: ignore[annotation-unchecked]
 
-    if mode == "rb":
-        with open(xsl_path, "rb") as f:
+    with open(xsl_path, mode, encoding="utf-8" if mode == "r" else None) as f:
+        if mode == "rb":
             xsl_obj = BytesIO(f.read())
-    else:
-        with open(xsl_path, encoding="utf-8") as f:
+        else:
             xsl_obj = StringIO(f.read())
 
     output = geom_df.to_xml(stylesheet=xsl_obj)
@@ -1015,12 +1010,8 @@ def test_stylesheet_io(datapath, mode):
 def test_stylesheet_buffered_reader(datapath, mode):
     xsl = datapath("io", "data", "xml", "row_field_output.xsl")
 
-    if mode == "rb":
-        with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f:
-            xsl_obj = f.read()
-    else:
-        with open(xsl, mode, encoding="utf-8") as f:
-            xsl_obj = f.read()
+    with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f:
+        xsl_obj = f.read()
 
     output = geom_df.to_xml(stylesheet=xsl_obj)
 

From 91287692679cd80dde5bd73669ce02ed8e47d703 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Sun, 7 May 2023 09:04:00 +0100
Subject: [PATCH 17/17] simplify

---
 pandas/_testing/contexts.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py
index f09c59e09fbb2..ba2c8c219dc41 100644
--- a/pandas/_testing/contexts.py
+++ b/pandas/_testing/contexts.py
@@ -124,13 +124,10 @@ def ensure_clean(
     path.touch()
 
     handle_or_str: str | IO = str(path)
+    encoding = kwargs.pop("encoding", None)
     if return_filelike:
         kwargs.setdefault("mode", "w+b")
-        handle_or_str = open(
-            path,
-            encoding=kwargs.get("encoding", None),
-            **{key: value for key, value in kwargs.items() if key != "encoding"},
-        )
+        handle_or_str = open(path, encoding=encoding, **kwargs)
 
     try:
         yield handle_or_str