From 5b482fed8b04b6296d39893dab66e4fe65529978 Mon Sep 17 00:00:00 2001 From: Ketu Patel Date: Fri, 28 Apr 2023 10:42:20 +0530 Subject: [PATCH 01/17] Changes Confirmed --- .pre-commit-config.yaml => b/.pre-commit-config.yaml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) rename .pre-commit-config.yaml => b/.pre-commit-config.yaml (98%) diff --git a/.pre-commit-config.yaml b/b/.pre-commit-config.yaml similarity index 98% rename from .pre-commit-config.yaml rename to b/.pre-commit-config.yaml index 43b3699907325..fae32b7b01485 100644 --- a/.pre-commit-config.yaml +++ b/b/.pre-commit-config.yaml @@ -83,9 +83,6 @@ repos: hooks: - id: pylint stages: [manual] -- repo: https://github.com/pycqa/pylint - rev: v2.16.2 - hooks: - id: pylint alias: redefined-outer-name name: Redefining name from outer scope @@ -99,6 +96,11 @@ repos: |^pandas/conftest\.py # keep excluded args: [--disable=all, --enable=redefined-outer-name] stages: [manual] + - id: pylint + alias: unspecified-encoding + name: Using open without explicitly specifying an encoding + args: [--disable=all, --enable=unspecified-encoding] + stages: [manual] - repo: https://github.com/PyCQA/isort rev: 5.12.0 hooks: From c20ac6d72093019999c42c56eb1650adc90f750e Mon Sep 17 00:00:00 2001 From: Ketu Patel Date: Sat, 29 Apr 2023 14:30:48 +0530 Subject: [PATCH 02/17] Encoding Completed --- .pre-commit-config.yaml | 432 +++++++++++++++++++ scripts/check_test_naming.py | 2 +- scripts/validate_rst_title_capitalization.py | 2 +- 3 files changed, 434 insertions(+), 2 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000000000..fae32b7b01485 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,432 @@ +minimum_pre_commit_version: 2.15.0 +exclude: ^LICENSES/|\.(html|csv|svg)$ +# reserve "manual" for relatively slow hooks which we still want to run in CI +default_stages: [ + commit, + merge-commit, + push, + prepare-commit-msg, + commit-msg, + post-checkout, + post-commit, + post-merge, + post-rewrite +] +ci: + autofix_prs: false +repos: +- repo: local + hooks: + # NOTE: we make `black` a local hook because if it's installed from + # PyPI (rather than from source) then it'll run twice as fast thanks to mypyc + - id: black + name: black + description: "Black: The uncompromising Python code formatter" + entry: black + language: python + require_serial: true + types_or: [python, pyi] + additional_dependencies: [black==23.1.0] +- repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.0.259 + hooks: + - id: ruff + args: [--exit-non-zero-on-fix] +- repo: https://github.com/jendrikseipp/vulture + rev: 'v2.7' + hooks: + - id: vulture + entry: python scripts/run_vulture.py + pass_filenames: true + require_serial: false +- repo: https://github.com/codespell-project/codespell + rev: v2.2.2 + hooks: + - id: codespell + types_or: [python, rst, markdown, cython, c] + additional_dependencies: [tomli] +- repo: https://github.com/MarcoGorelli/cython-lint + rev: v0.12.5 + hooks: + - id: cython-lint + - id: double-quote-cython-strings +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: debug-statements + - id: end-of-file-fixer + exclude: \.txt$ + stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, + post-checkout, post-commit, post-merge, post-rewrite] + - id: trailing-whitespace + stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, + post-checkout, post-commit, post-merge, post-rewrite] +- repo: https://github.com/cpplint/cpplint + rev: 1.6.1 + hooks: + - id: cpplint + # We don't lint all C files because we don't want to lint any that are built + # from Cython files nor do we want to lint C files that we didn't modify for + # this particular codebase (e.g. src/headers, src/klib). However, + # we can lint all header files since they aren't "generated" like C files are. + exclude: ^pandas/_libs/src/(klib|headers)/ + args: [ + --quiet, + '--extensions=c,h', + '--headers=h', + --recursive, + --linelength=88, + '--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size' + ] +- repo: https://github.com/pycqa/pylint + rev: v2.16.2 + hooks: + - id: pylint + stages: [manual] + - id: pylint + alias: redefined-outer-name + name: Redefining name from outer scope + files: ^pandas/ + exclude: | + (?x) + ^pandas/tests # keep excluded + |/_testing/ # keep excluded + |^pandas/util/_test_decorators\.py # keep excluded + |^pandas/_version\.py # keep excluded + |^pandas/conftest\.py # keep excluded + args: [--disable=all, --enable=redefined-outer-name] + stages: [manual] + - id: pylint + alias: unspecified-encoding + name: Using open without explicitly specifying an encoding + args: [--disable=all, --enable=unspecified-encoding] + stages: [manual] +- repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort +- repo: https://github.com/asottile/pyupgrade + rev: v3.3.1 + hooks: + - id: pyupgrade + args: [--py38-plus] +- repo: https://github.com/pre-commit/pygrep-hooks + rev: v1.10.0 + hooks: + - id: rst-backticks + - id: rst-directive-colons + types: [text] # overwrite types: [rst] + types_or: [python, rst] + - id: rst-inline-touching-normal + types: [text] # overwrite types: [rst] + types_or: [python, rst] +- repo: https://github.com/sphinx-contrib/sphinx-lint + rev: v0.6.7 + hooks: + - id: sphinx-lint +- repo: local + hooks: + - id: pyright + # note: assumes python env is setup and activated + name: pyright + entry: pyright + language: node + pass_filenames: false + types: [python] + stages: [manual] + additional_dependencies: &pyright_dependencies + - pyright@1.1.292 + - id: pyright_reportGeneralTypeIssues + # note: assumes python env is setup and activated + name: pyright reportGeneralTypeIssues + entry: pyright --skipunannotated -p pyright_reportGeneralTypeIssues.json --level warning + language: node + pass_filenames: false + types: [python] + stages: [manual] + additional_dependencies: *pyright_dependencies + - id: mypy + # note: assumes python env is setup and activated + name: mypy + entry: mypy + language: system + pass_filenames: false + types: [python] + stages: [manual] + - id: stubtest + # note: assumes python env is setup and activated + # note: requires pandas dev to be installed + name: mypy (stubtest) + entry: python + language: system + pass_filenames: false + types: [pyi] + args: [scripts/run_stubtest.py] + stages: [manual] + - id: inconsistent-namespace-usage + name: 'Check for inconsistent use of pandas namespace' + entry: python scripts/check_for_inconsistent_pandas_namespace.py + exclude: ^pandas/core/interchange/ + language: python + types: [python] + - id: no-os-remove + name: Check code for instances of os.remove + entry: os\.remove + language: pygrep + types: [python] + files: ^pandas/tests/ + exclude: | + (?x)^ + pandas/tests/io/pytables/test_store\.py$ + - id: unwanted-patterns + name: Unwanted patterns + language: pygrep + entry: | + (?x) + # outdated annotation syntax, missing error codes + \#\ type:\ (?!ignore) + |\#\ type:\s?ignore(?!\[) + + # foo._class__ instead of type(foo) + |\.__class__ + + # np.bool/np.object instead of np.bool_/np.object_ + |np\.bool[^_8`] + |np\.object[^_8`] + + # imports from collections.abc instead of `from collections import abc` + |from\ collections\.abc\ import + + # Numpy + |from\ numpy\ import\ random + |from\ numpy\.random\ import + + # Incorrect code-block / IPython directives + |\.\.\ code-block\ :: + |\.\.\ ipython\ :: + # directive should not have a space before :: + |\.\.\ \w+\ :: + + # Check for deprecated messages without sphinx directive + |(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.) + + # {foo!r} instead of {repr(foo)} + |!r} + + # builtin filter function + |(?obj`, not ` obj` + language: pygrep + entry: '[a-zA-Z0-9*]> ' + files: (\.pyx|\.pxi.in)$ + - id: incorrect-backticks + name: Check for backticks incorrectly rendering because of missing spaces + language: pygrep + entry: '[a-zA-Z0-9]\`\`?[a-zA-Z0-9]' + types: [rst] + files: ^doc/source/ + - id: seed-check-asv + name: Check for unnecessary random seeds in asv benchmarks + language: pygrep + entry: 'np\.random\.seed' + files: ^asv_bench/benchmarks + exclude: ^asv_bench/benchmarks/pandas_vb_common\.py + - id: np-testing-array-equal + name: Check for usage of numpy testing or array_equal + language: pygrep + entry: '(numpy|np)(\.testing|\.array_equal)' + files: ^pandas/tests/ + types: [python] + - id: invalid-ea-testing + name: Check for invalid EA testing + language: pygrep + entry: 'tm\.assert_(series|frame)_equal' + files: ^pandas/tests/extension/base + types: [python] + exclude: ^pandas/tests/extension/base/base\.py + - id: unwanted-patterns-in-tests + name: Unwanted patterns in tests + language: pygrep + entry: | + (?x) + # pytest.xfail instead of pytest.mark.xfail + pytest\.xfail + + # imports from pandas._testing instead of `import pandas._testing as tm` + |from\ pandas\._testing\ import + |from\ pandas\ import\ _testing\ as\ tm + + # No direct imports from conftest + |conftest\ import + |import\ conftest + + # pandas.testing instead of tm + |pd\.testing\. + + # pd.api.types instead of from pandas.api.types import ... + |(pd|pandas)\.api\.types\. + + # np.testing, np.array_equal + |(numpy|np)(\.testing|\.array_equal) + + # unittest.mock (use pytest builtin monkeypatch fixture instead) + |(unittest(\.| import )mock|mock\.Mock\(\)|mock\.patch) + + # pytest raises without context + |\s\ pytest.raises + + # pytest.warns (use tm.assert_produces_warning instead) + |pytest\.warns + files: ^pandas/tests/ + types_or: [python, cython, rst] + - id: unwanted-patterns-in-ea-tests + name: Unwanted patterns in EA tests + language: pygrep + entry: | + (?x) + tm.assert_(series|frame)_equal + files: ^pandas/tests/extension/base/ + exclude: ^pandas/tests/extension/base/base\.py$ + types_or: [python, cython, rst] + - id: unwanted-patterns-in-cython + name: Unwanted patterns in Cython code + language: pygrep + entry: | + (?x) + # `obj` as opposed to ` obj` + [a-zA-Z0-9*]>[ ] + types: [cython] + - id: pip-to-conda + name: Generate pip dependency from conda + language: python + entry: python scripts/generate_pip_deps_from_conda.py + files: ^(environment.yml|requirements-dev.txt)$ + pass_filenames: false + additional_dependencies: [tomli, pyyaml] + - id: title-capitalization + name: Validate correct capitalization among titles in documentation + entry: python scripts/validate_rst_title_capitalization.py + language: python + types: [rst] + files: ^doc/source/(development|reference)/ + - id: unwanted-patterns-bare-pytest-raises + name: Check for use of bare pytest raises + language: python + entry: python scripts/validate_unwanted_patterns.py --validation-type="bare_pytest_raises" + types: [python] + files: ^pandas/tests/ + exclude: ^pandas/tests/extension/ + - id: unwanted-patterns-private-function-across-module + name: Check for use of private functions across modules + language: python + entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" + types: [python] + exclude: ^(asv_bench|pandas/tests|doc)/ + - id: unwanted-patterns-private-import-across-module + name: Check for import of private attributes across modules + language: python + entry: python scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" + types: [python] + exclude: | + (?x) + ^(asv_bench|pandas/tests|doc)/ + |scripts/validate_min_versions_in_sync\.py$ + - id: unwanted-patterns-strings-with-misplaced-whitespace + name: Check for strings with misplaced spaces + language: python + entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace" + types_or: [python, cython] + - id: use-pd_array-in-core + name: Import pandas.array as pd_array in core + language: python + entry: python scripts/use_pd_array_in_core.py + files: ^pandas/core/ + exclude: ^pandas/core/api\.py$ + types: [python] + - id: use-io-common-urlopen + name: Use pandas.io.common.urlopen instead of urllib.request.urlopen + language: python + entry: python scripts/use_io_common_urlopen.py + files: ^pandas/ + exclude: ^pandas/tests/ + types: [python] + - id: no-bool-in-core-generic + name: Use bool_t instead of bool in pandas/core/generic.py + entry: python scripts/no_bool_in_generic.py + language: python + files: ^pandas/core/generic\.py$ + - id: no-return-exception + name: Use raise instead of return for exceptions + language: pygrep + entry: 'return [A-Za-z]+(Error|Exit|Interrupt|Exception|Iteration)' + files: ^pandas/ + types: [python] + exclude: ^pandas/tests/ + - id: pandas-errors-documented + name: Ensure pandas errors are documented in doc/source/reference/testing.rst + entry: python scripts/pandas_errors_documented.py + language: python + files: ^pandas/errors/__init__.py$ + - id: pg8000-not-installed-CI + name: Check for pg8000 not installed on CI for test_pg8000_sqlalchemy_passthrough_error + language: pygrep + entry: 'pg8000' + files: ^ci/deps + types: [yaml] + - id: validate-min-versions-in-sync + name: Check minimum version of dependencies are aligned + entry: python -m scripts.validate_min_versions_in_sync + language: python + files: ^(ci/deps/actions-.*-minimum_versions\.yaml|pandas/compat/_optional\.py)$ + additional_dependencies: [tomli, pyyaml] + pass_filenames: false + - id: validate-errors-locations + name: Validate errors locations + description: Validate errors are in appropriate locations. + entry: python scripts/validate_exception_location.py + language: python + files: ^pandas/ + exclude: ^(pandas/_libs/|pandas/tests/|pandas/errors/__init__.py$|pandas/_version.py) + types: [python] + - id: future-annotations + name: import annotations from __future__ + entry: 'from __future__ import annotations' + language: pygrep + args: [--negate] + files: ^pandas/ + types: [python] + exclude: | + (?x) + /(__init__\.py)|(api\.py)|(_version\.py)|(testing\.py)|(conftest\.py)$ + |/tests/ + |/_testing/ + - id: autotyping + name: autotyping + entry: python -m scripts.run_autotyping + types_or: [python, pyi] + files: ^pandas + exclude: ^(pandas/tests|pandas/_version.py|pandas/io/clipboard) + language: python + stages: [manual] + additional_dependencies: + - autotyping==23.3.0 + - libcst==0.4.9 + - id: check-test-naming + name: check that test names start with 'test' + entry: python -m scripts.check_test_naming + types: [python] + files: ^pandas/tests + language: python + - id: sort-whatsnew-items + name: sort whatsnew entries alphabetically + entry: python -m scripts.sort_whatsnew_note + types: [rst] + language: python + files: ^doc/source/whatsnew/v + exclude: ^doc/source/whatsnew/v(0|1|2\.0\.0) diff --git a/scripts/check_test_naming.py b/scripts/check_test_naming.py index 33890feb8692d..158cf46f264c2 100644 --- a/scripts/check_test_naming.py +++ b/scripts/check_test_naming.py @@ -118,7 +118,7 @@ def main(content: str, file: str) -> int: assert isinstance(_node, ast.FunctionDef) # help mypy should_continue = False for _file in (Path("pandas") / "tests").rglob("*.py"): - with open(os.path.join(_file)) as fd: + with open(os.path.join(_file), encoding="utf-8") as fd: _content = fd.read() if f"self.{_node.name}" in _content: should_continue = True diff --git a/scripts/validate_rst_title_capitalization.py b/scripts/validate_rst_title_capitalization.py index 4446ed62f6b8a..0f4c11eb30b07 100755 --- a/scripts/validate_rst_title_capitalization.py +++ b/scripts/validate_rst_title_capitalization.py @@ -226,7 +226,7 @@ def find_titles(rst_file: str) -> Iterable[tuple[str, int]]: The corresponding line number of the heading. """ - with open(rst_file) as fd: + with open(rst_file, encoding="utf-8") as fd: previous_line = "" for i, line in enumerate(fd): line_no_last_elem = line[:-1] From 682fdf836dbb96b55c2ba4f90558eef608c6700b Mon Sep 17 00:00:00 2001 From: Ketu Patel Date: Sat, 29 Apr 2023 15:25:59 +0530 Subject: [PATCH 03/17] Spaces Are Completed --- .pre-commit-config.yaml | 10 +++++----- b/.pre-commit-config.yaml | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fae32b7b01485..34cd91940f014 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -96,11 +96,11 @@ repos: |^pandas/conftest\.py # keep excluded args: [--disable=all, --enable=redefined-outer-name] stages: [manual] - - id: pylint - alias: unspecified-encoding - name: Using open without explicitly specifying an encoding - args: [--disable=all, --enable=unspecified-encoding] - stages: [manual] + - id: pylint + alias: unspecified-encoding + name: Using open without explicitly specifying an encoding + args: [--disable=all, --enable=unspecified-encoding] + stages: [manual] - repo: https://github.com/PyCQA/isort rev: 5.12.0 hooks: diff --git a/b/.pre-commit-config.yaml b/b/.pre-commit-config.yaml index fae32b7b01485..34cd91940f014 100644 --- a/b/.pre-commit-config.yaml +++ b/b/.pre-commit-config.yaml @@ -96,11 +96,11 @@ repos: |^pandas/conftest\.py # keep excluded args: [--disable=all, --enable=redefined-outer-name] stages: [manual] - - id: pylint - alias: unspecified-encoding - name: Using open without explicitly specifying an encoding - args: [--disable=all, --enable=unspecified-encoding] - stages: [manual] + - id: pylint + alias: unspecified-encoding + name: Using open without explicitly specifying an encoding + args: [--disable=all, --enable=unspecified-encoding] + stages: [manual] - repo: https://github.com/PyCQA/isort rev: 5.12.0 hooks: From a5fbd475b7f5ef2312d5bb51372049eb914e80ff Mon Sep 17 00:00:00 2001 From: Ketu Patel Date: Sat, 29 Apr 2023 17:31:14 +0530 Subject: [PATCH 04/17] Pre-ccommit manually completed --- asv_bench/benchmarks/io/csv.py | 2 +- doc/make.py | 8 +++--- doc/source/conf.py | 4 +-- pandas/_testing/contexts.py | 2 +- pandas/_version.py | 2 +- pandas/core/series.py | 2 +- pandas/io/clipboard/__init__.py | 4 +-- pandas/tests/frame/methods/test_to_csv.py | 2 +- pandas/tests/io/excel/test_readers.py | 2 +- pandas/tests/io/formats/style/test_html.py | 2 +- pandas/tests/io/formats/test_to_csv.py | 24 ++++++++--------- pandas/tests/io/formats/test_to_latex.py | 2 +- pandas/tests/io/json/test_pandas.py | 2 +- .../tests/io/parser/common/test_chunksize.py | 2 +- .../io/parser/common/test_file_buffer_url.py | 6 ++--- .../tests/io/parser/common/test_iterator.py | 4 +-- pandas/tests/io/parser/test_c_parser_only.py | 6 ++--- pandas/tests/io/parser/test_compression.py | 4 +-- pandas/tests/io/test_common.py | 4 +-- pandas/tests/io/test_compression.py | 4 +-- pandas/tests/io/test_gcs.py | 2 +- pandas/tests/io/test_html.py | 2 +- pandas/tests/io/xml/test_to_xml.py | 6 ++--- pandas/tests/io/xml/test_xml.py | 26 +++++++++---------- pandas/tests/io/xml/test_xml_dtypes.py | 2 +- pandas/tests/series/methods/test_to_csv.py | 4 +-- pandas/tests/util/test_show_versions.py | 4 +-- scripts/generate_pxi.py | 4 +-- scripts/generate_version.py | 2 +- scripts/pandas_errors_documented.py | 2 +- scripts/sort_whatsnew_note.py | 4 +-- .../test_validate_min_versions_in_sync.py | 4 +-- scripts/validate_docstrings.py | 2 +- scripts/validate_exception_location.py | 2 +- scripts/validate_min_versions_in_sync.py | 4 +-- setup.py | 4 +-- web/pandas_web.py | 14 +++++----- 37 files changed, 88 insertions(+), 88 deletions(-) diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py index 36301d22db5d3..856327dfe876f 100644 --- a/asv_bench/benchmarks/io/csv.py +++ b/asv_bench/benchmarks/io/csv.py @@ -444,7 +444,7 @@ class ReadCSVMemoryGrowth(BaseIO): param_names = ["engine"] def setup(self, engine): - with open(self.fname, "w") as f: + with open(self.fname, "w",encoding="utf-8") as f: for i in range(self.num_rows): f.write(f"{i}\n") diff --git a/doc/make.py b/doc/make.py index f5bf170c6274d..78edbc997eb55 100755 --- a/doc/make.py +++ b/doc/make.py @@ -163,12 +163,12 @@ def _get_page_title(self, page): components=(docutils.parsers.rst.Parser,) ) doc = docutils.utils.new_document("", option_parser.get_default_values()) - with open(fname) as f: + with open(fname,encoding="utf-8") as f: data = f.read() parser = docutils.parsers.rst.Parser() # do not generate any warning when parsing the rst - with open(os.devnull, "a") as f: + with open(os.devnull, "a",encoding="utf-8") as f: doc.reporter.stream = f parser.parse(data, doc) @@ -186,7 +186,7 @@ def _add_redirects(self): Create in the build directory an html file with a redirect, for every row in REDIRECTS_FILE. """ - with open(REDIRECTS_FILE) as mapping_fd: + with open(REDIRECTS_FILE,encoding="utf-8") as mapping_fd: reader = csv.reader(mapping_fd) for row in reader: if not row or row[0].strip().startswith("#"): @@ -209,7 +209,7 @@ def _add_redirects(self): # sphinx specific stuff title = "this page" - with open(path, "w") as moved_page_fd: + with open(path, "w",encoding="utf-8") as moved_page_fd: html = f"""\ diff --git a/doc/source/conf.py b/doc/source/conf.py index 0219c0e4f05ba..e5e764f4c7ef4 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -117,9 +117,9 @@ elif single_doc and rel_fname != pattern: exclude_patterns.append(rel_fname) -with open(os.path.join(source_path, "index.rst.template")) as f: +with open(os.path.join(source_path, "index.rst.template"),encoding="utf-8") as f: t = jinja2.Template(f.read()) -with open(os.path.join(source_path, "index.rst"), "w") as f: +with open(os.path.join(source_path, "index.rst"), "w",encoding="utf-8") as f: f.write( t.render( include_api=include_api, diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index ab00c80886794..d36dedc3008ac 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -126,7 +126,7 @@ def ensure_clean( handle_or_str: str | IO = str(path) if return_filelike: kwargs.setdefault("mode", "w+b") - handle_or_str = open(path, **kwargs) + handle_or_str = open(path, **kwargs,encoding="utf-8") try: yield handle_or_str diff --git a/pandas/_version.py b/pandas/_version.py index 6705b8505f7e2..59215ff72e715 100644 --- a/pandas/_version.py +++ b/pandas/_version.py @@ -159,7 +159,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - with open(versionfile_abs) as fobj: + with open(versionfile_abs,encoding="utf-8") as fobj: for line in fobj: if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) diff --git a/pandas/core/series.py b/pandas/core/series.py index 2b71eb4a9480d..d7db059b7cba5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1699,7 +1699,7 @@ def to_string( if hasattr(buf, "write"): buf.write(result) else: - with open(buf, "w") as f: + with open(buf, "w",encoding="utf-8") as f: f.write(result) return None diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py index e574ed2c8059a..d29c6c216256c 100644 --- a/pandas/io/clipboard/__init__.py +++ b/pandas/io/clipboard/__init__.py @@ -282,11 +282,11 @@ def copy_dev_clipboard(text): stacklevel=find_stack_level(), ) - with open("/dev/clipboard", "w") as fd: + with open("/dev/clipboard", "w",encoding="utf-8") as fd: fd.write(text) def paste_dev_clipboard() -> str: - with open("/dev/clipboard") as fd: + with open("/dev/clipboard",encoding="utf-8") as fd: content = fd.read() return content diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 639c6f9d73511..494bff217b60e 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -626,7 +626,7 @@ def test_to_csv_float32_nanrep(self): with tm.ensure_clean("__tmp_to_csv_float32_nanrep__.csv") as path: df.to_csv(path, na_rep=999) - with open(path) as f: + with open(path,encoding="utf-8") as f: lines = f.readlines() assert lines[1].split(",")[2] == "999" diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 37ecce84e3caa..cf0df8e5c23e5 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1702,7 +1702,7 @@ def test_corrupt_files_closed(self, engine, read_ext): errors = (BadZipFile, xlrd.biffh.XLRDError) with tm.ensure_clean(f"corrupt{read_ext}") as file: - Path(file).write_text("corrupt") + Path(file).write_text("corrupt",encoding="utf-8") with tm.assert_produces_warning(False): try: pd.ExcelFile(file, engine=engine) diff --git a/pandas/tests/io/formats/style/test_html.py b/pandas/tests/io/formats/style/test_html.py index 67f7e12fcc3c2..d06e7ef254469 100644 --- a/pandas/tests/io/formats/style/test_html.py +++ b/pandas/tests/io/formats/style/test_html.py @@ -43,7 +43,7 @@ def tpl_table(): def test_html_template_extends_options(): # make sure if templates are edited tests are updated as are setup fixtures # to understand the dependency - with open("pandas/io/formats/templates/html.tpl") as file: + with open("pandas/io/formats/templates/html.tpl",encoding="utf-8") as file: result = file.read() assert "{% include html_style_tpl %}" in result assert "{% include html_table_tpl %}" in result diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 81dc79d3111b8..2c2ed7f8514c7 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -32,7 +32,7 @@ def test_to_csv_with_single_column(self): """ with tm.ensure_clean("test.csv") as path: df1.to_csv(path, header=None, index=None) - with open(path) as f: + with open(path,encoding="utf-8") as f: assert f.read() == expected1 df2 = DataFrame([1, None]) @@ -42,7 +42,7 @@ def test_to_csv_with_single_column(self): """ with tm.ensure_clean("test.csv") as path: df2.to_csv(path, header=None, index=None) - with open(path) as f: + with open(path,encoding="utf-8") as f: assert f.read() == expected2 def test_to_csv_default_encoding(self): @@ -64,7 +64,7 @@ def test_to_csv_quotechar(self): with tm.ensure_clean("test.csv") as path: df.to_csv(path, quoting=1) # 1=QUOTE_ALL - with open(path) as f: + with open(path,encoding="utf-8") as f: assert f.read() == expected expected = """\ @@ -75,7 +75,7 @@ def test_to_csv_quotechar(self): with tm.ensure_clean("test.csv") as path: df.to_csv(path, quoting=1, quotechar="$") - with open(path) as f: + with open(path,encoding="utf-8") as f: assert f.read() == expected with tm.ensure_clean("test.csv") as path: @@ -92,7 +92,7 @@ def test_to_csv_doublequote(self): with tm.ensure_clean("test.csv") as path: df.to_csv(path, quoting=1, doublequote=True) # QUOTE_ALL - with open(path) as f: + with open(path,encoding="utf-8") as f: assert f.read() == expected with tm.ensure_clean("test.csv") as path: @@ -109,7 +109,7 @@ def test_to_csv_escapechar(self): with tm.ensure_clean("test.csv") as path: # QUOTE_ALL df.to_csv(path, quoting=1, doublequote=False, escapechar="\\") - with open(path) as f: + with open(path,encoding="utf-8") as f: assert f.read() == expected df = DataFrame({"col": ["a,a", ",bb,"]}) @@ -121,7 +121,7 @@ def test_to_csv_escapechar(self): with tm.ensure_clean("test.csv") as path: df.to_csv(path, quoting=3, escapechar="\\") # QUOTE_NONE - with open(path) as f: + with open(path,encoding="utf-8") as f: assert f.read() == expected def test_csv_to_string(self): @@ -401,7 +401,7 @@ def test_to_csv_string_array_ascii(self): """ with tm.ensure_clean("str_test.csv") as path: df.to_csv(path, encoding="ascii") - with open(path) as f: + with open(path,encoding="utf-8") as f: assert f.read() == expected_ascii def test_to_csv_string_array_utf8(self): @@ -415,7 +415,7 @@ def test_to_csv_string_array_utf8(self): """ with tm.ensure_clean("unicode_test.csv") as path: df.to_csv(path, encoding="utf-8") - with open(path) as f: + with open(path,encoding="utf-8") as f: assert f.read() == expected_utf8 def test_to_csv_string_with_lf(self): @@ -521,10 +521,10 @@ def test_to_csv_write_to_open_file(self): z """ with tm.ensure_clean("test.txt") as path: - with open(path, "w") as f: + with open(path, "w",encoding="utf-8") as f: f.write("manual header\n") df.to_csv(f, header=None, index=None) - with open(path) as f: + with open(path,encoding="utf-8") as f: assert f.read() == expected def test_to_csv_write_to_open_file_with_newline_py3(self): @@ -534,7 +534,7 @@ def test_to_csv_write_to_open_file_with_newline_py3(self): expected_rows = ["x", "y", "z"] expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows) with tm.ensure_clean("test.txt") as path: - with open(path, "w", newline="") as f: + with open(path, "w", newline="",encoding="utf-8") as f: f.write("manual header\n") df.to_csv(f, header=None, index=None) diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 64c064172a646..c46aa609922a3 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -34,7 +34,7 @@ class TestToLatex: def test_to_latex_to_file(self, float_frame): with tm.ensure_clean("test.tex") as path: float_frame.to_latex(path) - with open(path) as f: + with open(path,encoding="utf-8") as f: assert float_frame.to_latex() == f.read() def test_to_latex_to_file_utf8_with_encoding(self): diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 5fc04509b86b6..377e1cc3d99ba 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1214,7 +1214,7 @@ def test_read_s3_jsonl(self, s3_resource, s3so): def test_read_local_jsonl(self): # GH17200 with tm.ensure_clean("tmp_items.json") as path: - with open(path, "w") as infile: + with open(path, "w",encoding="utf-8") as infile: infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n') result = read_json(path, lines=True) expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) diff --git a/pandas/tests/io/parser/common/test_chunksize.py b/pandas/tests/io/parser/common/test_chunksize.py index c8cef56c73902..14327a98ec47e 100644 --- a/pandas/tests/io/parser/common/test_chunksize.py +++ b/pandas/tests/io/parser/common/test_chunksize.py @@ -228,7 +228,7 @@ def test_read_csv_memory_growth_chunksize(all_parsers): parser = all_parsers with tm.ensure_clean() as path: - with open(path, "w") as f: + with open(path, "w",encoding="utf-8") as f: for i in range(1000): f.write(str(i) + "\n") diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py index c11a59a8b4660..ba41b46f37099 100644 --- a/pandas/tests/io/parser/common/test_file_buffer_url.py +++ b/pandas/tests/io/parser/common/test_file_buffer_url.py @@ -107,7 +107,7 @@ def test_no_permission(all_parsers): # verify that this process cannot open the file (not running as sudo) try: - with open(path): + with open(path,encoding="utf-8"): pass pytest.skip("Running as sudo.") except PermissionError: @@ -285,7 +285,7 @@ def test_file_handles_with_open(all_parsers, csv1): parser = all_parsers for mode in ["r", "rb"]: - with open(csv1, mode) as f: + with open(csv1, mode,encoding="utf-8") as f: parser.read_csv(f) assert not f.closed @@ -392,7 +392,7 @@ def test_context_manageri_user_provided(all_parsers, datapath): # make sure that user-provided handles are not closed parser = all_parsers - with open(datapath("io", "data", "csv", "iris.csv")) as path: + with open(datapath("io", "data", "csv", "iris.csv"),encoding="utf-8") as path: reader = parser.read_csv(path, chunksize=1) assert not reader.handles.handle.closed try: diff --git a/pandas/tests/io/parser/common/test_iterator.py b/pandas/tests/io/parser/common/test_iterator.py index 939ed0e73a5ee..f517c94998138 100644 --- a/pandas/tests/io/parser/common/test_iterator.py +++ b/pandas/tests/io/parser/common/test_iterator.py @@ -95,10 +95,10 @@ def test_iteration_open_handle(all_parsers): kwargs = {"header": None} with tm.ensure_clean() as path: - with open(path, "w") as f: + with open(path, "w",encoding="utf-8") as f: f.write("AAA\nBBB\nCCC\nDDD\nEEE\nFFF\nGGG") - with open(path) as f: + with open(path,encoding="utf-8") as f: for line in f: if "CCC" in line: break diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index c2a65704a845a..d45f47a805019 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -603,7 +603,7 @@ def test_file_handles_mmap(c_parser_only, csv1): # Don't close user provided file handles. parser = c_parser_only - with open(csv1) as f: + with open(csv1,encoding="utf-8") as f: with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as m: parser.read_csv(m) assert not m.closed @@ -615,7 +615,7 @@ def test_file_binary_mode(c_parser_only): expected = DataFrame([[1, 2, 3], [4, 5, 6]]) with tm.ensure_clean() as path: - with open(path, "w") as f: + with open(path, "w",encoding="utf-8") as f: f.write("1,2,3\n4,5,6") with open(path, "rb") as f: @@ -627,7 +627,7 @@ def test_unix_style_breaks(c_parser_only): # GH 11020 parser = c_parser_only with tm.ensure_clean() as path: - with open(path, "w", newline="\n") as f: + with open(path, "w", newline="\n",encoding="utf-8") as f: f.write("blah\n\ncol_1,col_2,col_3\n\n") result = parser.read_csv(path, skiprows=2, encoding="utf-8", engine="c") expected = DataFrame(columns=["col_1", "col_2", "col_3"]) diff --git a/pandas/tests/io/parser/test_compression.py b/pandas/tests/io/parser/test_compression.py index ab00e31bd9b43..7ab9237637f84 100644 --- a/pandas/tests/io/parser/test_compression.py +++ b/pandas/tests/io/parser/test_compression.py @@ -129,7 +129,7 @@ def test_infer_compression(all_parsers, csv1, buffer, ext): kwargs["compression"] = "infer" if buffer: - with open(csv1) as f: + with open(csv1,encoding="utf-8") as f: result = parser.read_csv(f, **kwargs) else: ext = "." + ext if ext else "" @@ -183,7 +183,7 @@ def test_ignore_compression_extension(all_parsers): with tm.ensure_clean("test.csv.zip") as path_zip: # make sure to create un-compressed file with zip extension df.to_csv(path_csv, index=False) - Path(path_zip).write_text(Path(path_csv).read_text()) + Path(path_zip).write_text(Path(path_csv).read_text(encoding="utf-8"),encoding="utf-8") tm.assert_frame_equal(parser.read_csv(path_zip, compression=None), df) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index b248c0c460c74..4e273edfeec69 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -411,7 +411,7 @@ def test_constructor_bad_file(self, mmap_file): with pytest.raises(err, match=msg): icom._maybe_memory_map(non_file, True) - with open(mmap_file) as target: + with open(mmap_file,encoding="utf-8") as target: pass msg = "I/O operation on closed file" @@ -419,7 +419,7 @@ def test_constructor_bad_file(self, mmap_file): icom._maybe_memory_map(target, True) def test_next(self, mmap_file): - with open(mmap_file) as target: + with open(mmap_file,encoding="utf-8") as target: lines = target.readlines() with icom.get_handle( diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py index fc15ff3488ce9..da159566bd8be 100644 --- a/pandas/tests/io/test_compression.py +++ b/pandas/tests/io/test_compression.py @@ -299,10 +299,10 @@ def test_ambiguous_archive_zip(): def test_ambiguous_archive_tar(tmp_path): csvAPath = tmp_path / "a.csv" - with open(csvAPath, "w") as a: + with open(csvAPath, "w",encoding="utf-8") as a: a.write("foo,bar\n") csvBPath = tmp_path / "b.csv" - with open(csvBPath, "w") as b: + with open(csvBPath, "w",encoding="utf-8") as b: b.write("foo,bar\n") tarpath = tmp_path / "archive.tar" diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index b65a19d766976..f1fea1732492c 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -193,7 +193,7 @@ class MockGCSFileSystem(AbstractFileSystem): def open(self, path, mode="r", *args): if "w" not in mode: raise FileNotFoundError - return open(os.path.join(tmpdir, "test.parquet"), mode) + return open(os.path.join(tmpdir, "test.parquet"), mode,encoding="utf-8") monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem) df1.to_parquet( diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 4bd4e0cd7146f..bf82c14a88e8e 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -692,7 +692,7 @@ def try_remove_ws(x): @pytest.mark.slow def test_gold_canyon(self, banklist_data): gc = "Gold Canyon" - with open(banklist_data) as f: + with open(banklist_data,encoding="utf-8") as f: raw_text = f.read() assert gc in raw_text diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py index 4843f40d6813d..b58064745ee19 100644 --- a/pandas/tests/io/xml/test_to_xml.py +++ b/pandas/tests/io/xml/test_to_xml.py @@ -983,7 +983,7 @@ def test_unknown_parser(): def test_stylesheet_file_like(datapath, mode): xsl = datapath("io", "data", "xml", "row_field_output.xsl") - with open(xsl, mode) as f: + with open(xsl, mode,encoding="utf-8") as f: assert geom_df.to_xml(stylesheet=f) == xsl_expected @@ -995,7 +995,7 @@ def test_stylesheet_io(datapath, mode): # consider using --check-untyped-defs xsl_obj: BytesIO | StringIO # type: ignore[annotation-unchecked] - with open(xsl_path, mode) as f: + with open(xsl_path, mode,encoding="utf-8") as f: if mode == "rb": xsl_obj = BytesIO(f.read()) else: @@ -1010,7 +1010,7 @@ def test_stylesheet_io(datapath, mode): def test_stylesheet_buffered_reader(datapath, mode): xsl = datapath("io", "data", "xml", "row_field_output.xsl") - with open(xsl, mode) as f: + with open(xsl, mode,encoding="utf-8") as f: xsl_obj = f.read() output = geom_df.to_xml(stylesheet=xsl_obj) diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index 071bc67d2dad9..2b3afc16b1ecd 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -259,7 +259,7 @@ def parser(request): def read_xml_iterparse(data, **kwargs): with tm.ensure_clean() as path: - with open(path, "w") as f: + with open(path, "w",encoding="utf-8") as f: f.write(data) return read_xml(path, **kwargs) @@ -267,7 +267,7 @@ def read_xml_iterparse(data, **kwargs): def read_xml_iterparse_comp(comp_path, compression_only, **kwargs): with get_handle(comp_path, "r", compression=compression_only) as handles: with tm.ensure_clean() as path: - with open(path, "w") as f: + with open(path, "w",encoding="utf-8") as f: f.write(handles.handle.read()) return read_xml(path, **kwargs) @@ -351,7 +351,7 @@ def test_parser_consistency_url(parser): def test_file_like(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") - with open(filename, mode) as f: + with open(filename, mode,encoding="utf-8") as f: df_file = read_xml(f, parser=parser) df_expected = DataFrame( @@ -369,7 +369,7 @@ def test_file_like(datapath, parser, mode): def test_file_io(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") - with open(filename, mode) as f: + with open(filename, mode,encoding="utf-8") as f: xml_obj = f.read() df_io = read_xml( @@ -392,7 +392,7 @@ def test_file_io(datapath, parser, mode): def test_file_buffered_reader_string(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") - with open(filename, mode) as f: + with open(filename, mode,encoding="utf-8") as f: xml_obj = f.read() df_str = read_xml(xml_obj, parser=parser) @@ -412,7 +412,7 @@ def test_file_buffered_reader_string(datapath, parser, mode): def test_file_buffered_reader_no_xml_declaration(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") - with open(filename, mode) as f: + with open(filename, mode,encoding="utf-8") as f: next(f) xml_obj = f.read() @@ -1154,7 +1154,7 @@ def test_stylesheet_file_like(datapath, mode): kml = datapath("io", "data", "xml", "cta_rail_lines.kml") xsl = datapath("io", "data", "xml", "flatten_doc.xsl") - with open(xsl, mode) as f: + with open(xsl, mode,encoding="utf-8") as f: df_style = read_xml( kml, xpath=".//k:Placemark", @@ -1174,7 +1174,7 @@ def test_stylesheet_io(datapath, mode): # consider using --check-untyped-defs xsl_obj: BytesIO | StringIO # type: ignore[annotation-unchecked] - with open(xsl, mode) as f: + with open(xsl, mode,encoding="utf-8") as f: if mode == "rb": xsl_obj = BytesIO(f.read()) else: @@ -1195,7 +1195,7 @@ def test_stylesheet_buffered_reader(datapath, mode): kml = datapath("io", "data", "xml", "cta_rail_lines.kml") xsl = datapath("io", "data", "xml", "flatten_doc.xsl") - with open(xsl, mode) as f: + with open(xsl, mode,encoding="utf-8") as f: xsl_obj = f.read() df_style = read_xml( @@ -1355,7 +1355,7 @@ def test_stylesheet_file_close(datapath, mode): # consider using --check-untyped-defs xsl_obj: BytesIO | StringIO # type: ignore[annotation-unchecked] - with open(xsl, mode) as f: + with open(xsl, mode,encoding="utf-8") as f: if mode == "rb": xsl_obj = BytesIO(f.read()) else: @@ -1407,7 +1407,7 @@ def test_string_error(parser): def test_file_like_iterparse(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") - with open(filename, mode) as f: + with open(filename, mode,encoding="utf-8") as f: if mode == "r" and parser == "lxml": with pytest.raises( TypeError, match=("reading file objects must return bytes objects") @@ -1444,7 +1444,7 @@ def test_file_io_iterparse(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") funcIO = StringIO if mode == "r" else BytesIO - with open(filename, mode) as f: + with open(filename, mode,encoding="utf-8") as f: with funcIO(f.read()) as b: if mode == "r" and parser == "lxml": with pytest.raises( @@ -1550,7 +1550,7 @@ def test_bad_xml(parser): """ with tm.ensure_clean(filename="bad.xml") as path: - with open(path, "w") as f: + with open(path, "w",encoding="utf-8") as f: f.write(bad_xml) with pytest.raises( diff --git a/pandas/tests/io/xml/test_xml_dtypes.py b/pandas/tests/io/xml/test_xml_dtypes.py index 17d1e7e00653b..ba563d17f277f 100644 --- a/pandas/tests/io/xml/test_xml_dtypes.py +++ b/pandas/tests/io/xml/test_xml_dtypes.py @@ -29,7 +29,7 @@ def iterparse(request): def read_xml_iterparse(data, **kwargs): with tm.ensure_clean() as path: - with open(path, "w") as f: + with open(path, "w",encoding="utf-8") as f: f.write(data) return read_xml(path, **kwargs) diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py index 990c3698a5036..d5384de4079a7 100644 --- a/pandas/tests/series/methods/test_to_csv.py +++ b/pandas/tests/series/methods/test_to_csv.py @@ -52,7 +52,7 @@ def test_from_csv(self, datetime_series, string_series): series_h = self.read_csv(path, header=0) assert series_h.name == "series" - with open(path, "w") as outfile: + with open(path, "w",encoding="utf-8") as outfile: outfile.write("1998-01-01|1.0\n1999-01-01|2.0") series = self.read_csv(path, sep="|", parse_dates=True) @@ -69,7 +69,7 @@ def test_to_csv(self, datetime_series): with tm.ensure_clean() as path: datetime_series.to_csv(path, header=False) - with open(path, newline=None) as f: + with open(path, newline=None,encoding="utf-8") as f: lines = f.readlines() assert lines[1] != "\n" diff --git a/pandas/tests/util/test_show_versions.py b/pandas/tests/util/test_show_versions.py index 714588d179aef..856aa0062e43d 100644 --- a/pandas/tests/util/test_show_versions.py +++ b/pandas/tests/util/test_show_versions.py @@ -16,7 +16,7 @@ def test_show_versions(tmpdir): pd.show_versions(as_json=as_json) - with open(as_json) as fd: + with open(as_json,encoding="utf-8") as fd: # check if file output is valid JSON, will raise an exception if not result = json.load(fd) @@ -75,7 +75,7 @@ def test_json_output_match(capsys, tmpdir): out_path = os.path.join(tmpdir, "test_json.json") pd.show_versions(as_json=out_path) - with open(out_path) as out_fd: + with open(out_path,encoding="utf-8") as out_fd: result_file = out_fd.read() assert result_console == result_file diff --git a/scripts/generate_pxi.py b/scripts/generate_pxi.py index 3462b97aefcbf..586b2d4fe3e35 100644 --- a/scripts/generate_pxi.py +++ b/scripts/generate_pxi.py @@ -5,11 +5,11 @@ def process_tempita(pxifile, outfile): - with open(pxifile) as f: + with open(pxifile,encoding="utf-8") as f: tmpl = f.read() pyxcontent = Tempita.sub(tmpl) - with open(outfile, "w") as f: + with open(outfile, "w",encoding="utf-8") as f: f.write(pyxcontent) diff --git a/scripts/generate_version.py b/scripts/generate_version.py index fbc78ab12429a..3b778567e2335 100644 --- a/scripts/generate_version.py +++ b/scripts/generate_version.py @@ -8,7 +8,7 @@ def write_version_info(path): if os.environ.get("MESON_DIST_ROOT"): # raise ValueError("dist root is", os.environ.get("MESON_DIST_ROOT")) path = os.path.join(os.environ.get("MESON_DIST_ROOT"), path) - with open(path, "w") as file: + with open(path, "w",encoding="utf-8") as file: file.write(f'__version__="{versioneer.get_version()}"\n') file.write( f'__git_version__="{versioneer.get_versions()["full-revisionid"]}"\n' diff --git a/scripts/pandas_errors_documented.py b/scripts/pandas_errors_documented.py index 52c1e2008b8a0..f23d9bc979334 100644 --- a/scripts/pandas_errors_documented.py +++ b/scripts/pandas_errors_documented.py @@ -34,7 +34,7 @@ def main(argv: Sequence[str] | None = None) -> None: args = parser.parse_args(argv) with open(args.path, encoding="utf-8") as f: file_errors = get_defined_errors(f.read()) - with open(API_PATH) as f: + with open(API_PATH,encoding="utf-8") as f: doc_errors = { line.split(".")[1].strip() for line in f.readlines() if "errors" in line } diff --git a/scripts/sort_whatsnew_note.py b/scripts/sort_whatsnew_note.py index ae1d3346a5827..b7a427329a2df 100644 --- a/scripts/sort_whatsnew_note.py +++ b/scripts/sort_whatsnew_note.py @@ -63,12 +63,12 @@ def main(argv: Sequence[str] | None = None) -> int: args = parser.parse_args(argv) ret = 0 for path in args.paths: - with open(path) as fd: + with open(path,encoding="utf-8") as fd: content = fd.read() new_content = sort_whatsnew_note(content) if content != new_content: ret |= 1 - with open(path, "w") as fd: + with open(path, "w",encoding="utf-8") as fd: fd.write(new_content) return ret diff --git a/scripts/tests/test_validate_min_versions_in_sync.py b/scripts/tests/test_validate_min_versions_in_sync.py index 13e8965bb7591..d454bf7063c3c 100644 --- a/scripts/tests/test_validate_min_versions_in_sync.py +++ b/scripts/tests/test_validate_min_versions_in_sync.py @@ -49,13 +49,13 @@ def test_pin_min_versions_to_yaml_file(src_toml, src_yaml, expected_yaml): with open(src_toml, "rb") as toml_f: toml_map = tomllib.load(toml_f) - with open(src_yaml) as yaml_f: + with open(src_yaml,encoding="utf-8") as yaml_f: yaml_file_data = yaml_f.read() yaml_file = yaml.safe_load(yaml_file_data) yaml_dependencies = yaml_file["dependencies"] yaml_map = get_yaml_map_from(yaml_dependencies) toml_map = get_toml_map_from(toml_map) result_yaml_file = pin_min_versions_to_yaml_file(yaml_map, toml_map, yaml_file_data) - with open(expected_yaml) as yaml_f: + with open(expected_yaml,encoding="utf-8") as yaml_f: dummy_yaml_expected_file_1 = yaml_f.read() assert result_yaml_file == dummy_yaml_expected_file_1 diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 4c133483f571f..6b43506bd03f1 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -367,7 +367,7 @@ def get_all_api_items(): base_path = pathlib.Path(__file__).parent.parent api_doc_fnames = pathlib.Path(base_path, "doc", "source", "reference") for api_doc_fname in api_doc_fnames.glob("*.rst"): - with open(api_doc_fname) as f: + with open(api_doc_fname,encoding="utf-8") as f: yield from get_api_items(f) diff --git a/scripts/validate_exception_location.py b/scripts/validate_exception_location.py index 7af5e749b4b96..82154e82786b9 100644 --- a/scripts/validate_exception_location.py +++ b/scripts/validate_exception_location.py @@ -36,7 +36,7 @@ def get_warnings_and_exceptions_from_api_path() -> set[str]: - with open(API_PATH) as f: + with open(API_PATH,encoding="utf-8") as f: doc_errors = { line.split(".")[1].strip() for line in f.readlines() if "errors" in line } diff --git a/scripts/validate_min_versions_in_sync.py b/scripts/validate_min_versions_in_sync.py index 9a6d97a222000..f4d51142876b9 100755 --- a/scripts/validate_min_versions_in_sync.py +++ b/scripts/validate_min_versions_in_sync.py @@ -63,7 +63,7 @@ def pin_min_versions_to_ci_deps() -> int: toml_dependencies = tomllib.load(toml_f) ret = 0 for curr_file in all_yaml_files: - with open(curr_file) as yaml_f: + with open(curr_file,encoding="utf-8") as yaml_f: yaml_start_data = yaml_f.read() yaml_file = yaml.safe_load(yaml_start_data) yaml_dependencies = yaml_file["dependencies"] @@ -73,7 +73,7 @@ def pin_min_versions_to_ci_deps() -> int: yaml_map, toml_map, yaml_start_data ) if yaml_result_data != yaml_start_data: - with open(curr_file, "w") as f: + with open(curr_file, "w",encoding="utf-8") as f: f.write(yaml_result_data) ret |= 1 return ret diff --git a/setup.py b/setup.py index 49f6557e2e250..8fcd4804cf6e6 100755 --- a/setup.py +++ b/setup.py @@ -88,11 +88,11 @@ def render_templates(cls, pxifiles): # if .pxi.in is not updated, no need to output .pxi continue - with open(pxifile) as f: + with open(pxifile,encoding="utf-8") as f: tmpl = f.read() pyxcontent = Tempita.sub(tmpl) - with open(outfile, "w") as f: + with open(outfile, "w",encoding="utf-8") as f: f.write(pyxcontent) def build_extensions(self): diff --git a/web/pandas_web.py b/web/pandas_web.py index 5e902f1b1919b..75dde11c25138 100755 --- a/web/pandas_web.py +++ b/web/pandas_web.py @@ -110,7 +110,7 @@ def blog_add_posts(context): md = markdown.Markdown( extensions=context["main"]["markdown_extensions"] ) - with open(os.path.join(posts_path, fname)) as f: + with open(os.path.join(posts_path, fname),encoding="utf-8") as f: html = md.convert(f.read()) title = md.Meta["title"][0] summary = re.sub(tag_expr, "", html) @@ -197,7 +197,7 @@ def maintainers_add_info(context): # save the data fetched from github to use it in case we exceed # git github api quota in the future - with open(pathlib.Path(context["target_path"]) / "maintainers.json", "w") as f: + with open(pathlib.Path(context["target_path"]) / "maintainers.json", "w",encoding="utf-8") as f: json.dump(maintainers_info, f) return context @@ -220,7 +220,7 @@ def home_add_releases(context): resp.raise_for_status() releases = resp.json() - with open(pathlib.Path(context["target_path"]) / "releases.json", "w") as f: + with open(pathlib.Path(context["target_path"]) / "releases.json", "w",encoding="utf-8") as f: json.dump(releases, f, default=datetime.datetime.isoformat) for release in releases: @@ -304,7 +304,7 @@ def roadmap_pdeps(context): resp.raise_for_status() pdeps = resp.json() - with open(pathlib.Path(context["target_path"]) / "pdeps.json", "w") as f: + with open(pathlib.Path(context["target_path"]) / "pdeps.json", "w",encoding="utf-8") as f: json.dump(pdeps, f) for pdep in sorted(pdeps["items"], key=operator.itemgetter("title")): @@ -346,7 +346,7 @@ def get_context(config_fname: str, **kwargs): Load the config yaml as the base context, and enrich it with the information added by the context preprocessors defined in the file. """ - with open(config_fname) as f: + with open(config_fname,encoding="utf-8") as f: context = yaml.safe_load(f) context["source_path"] = os.path.dirname(config_fname) @@ -418,7 +418,7 @@ def main( extension = os.path.splitext(fname)[-1] if extension in (".html", ".md"): - with open(os.path.join(source_path, fname)) as f: + with open(os.path.join(source_path, fname),encoding="utf-8") as f: content = f.read() if extension == ".md": body = markdown.markdown( @@ -431,7 +431,7 @@ def main( context["base_url"] = "".join(["../"] * os.path.normpath(fname).count("/")) content = jinja_env.from_string(content).render(**context) fname_html = os.path.splitext(fname)[0] + ".html" - with open(os.path.join(target_path, fname_html), "w") as f: + with open(os.path.join(target_path, fname_html), "w",encoding="utf-8") as f: f.write(content) else: shutil.copy( From e5282921d570dfa36d0431615d69e158aeeb1d72 Mon Sep 17 00:00:00 2001 From: Ketu Patel Date: Sat, 29 Apr 2023 17:37:15 +0530 Subject: [PATCH 05/17] b.pre-commit removed --- b/.pre-commit-config.yaml | 432 -------------------------------------- 1 file changed, 432 deletions(-) delete mode 100644 b/.pre-commit-config.yaml diff --git a/b/.pre-commit-config.yaml b/b/.pre-commit-config.yaml deleted file mode 100644 index 34cd91940f014..0000000000000 --- a/b/.pre-commit-config.yaml +++ /dev/null @@ -1,432 +0,0 @@ -minimum_pre_commit_version: 2.15.0 -exclude: ^LICENSES/|\.(html|csv|svg)$ -# reserve "manual" for relatively slow hooks which we still want to run in CI -default_stages: [ - commit, - merge-commit, - push, - prepare-commit-msg, - commit-msg, - post-checkout, - post-commit, - post-merge, - post-rewrite -] -ci: - autofix_prs: false -repos: -- repo: local - hooks: - # NOTE: we make `black` a local hook because if it's installed from - # PyPI (rather than from source) then it'll run twice as fast thanks to mypyc - - id: black - name: black - description: "Black: The uncompromising Python code formatter" - entry: black - language: python - require_serial: true - types_or: [python, pyi] - additional_dependencies: [black==23.1.0] -- repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.259 - hooks: - - id: ruff - args: [--exit-non-zero-on-fix] -- repo: https://github.com/jendrikseipp/vulture - rev: 'v2.7' - hooks: - - id: vulture - entry: python scripts/run_vulture.py - pass_filenames: true - require_serial: false -- repo: https://github.com/codespell-project/codespell - rev: v2.2.2 - hooks: - - id: codespell - types_or: [python, rst, markdown, cython, c] - additional_dependencies: [tomli] -- repo: https://github.com/MarcoGorelli/cython-lint - rev: v0.12.5 - hooks: - - id: cython-lint - - id: double-quote-cython-strings -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 - hooks: - - id: debug-statements - - id: end-of-file-fixer - exclude: \.txt$ - stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, - post-checkout, post-commit, post-merge, post-rewrite] - - id: trailing-whitespace - stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, - post-checkout, post-commit, post-merge, post-rewrite] -- repo: https://github.com/cpplint/cpplint - rev: 1.6.1 - hooks: - - id: cpplint - # We don't lint all C files because we don't want to lint any that are built - # from Cython files nor do we want to lint C files that we didn't modify for - # this particular codebase (e.g. src/headers, src/klib). However, - # we can lint all header files since they aren't "generated" like C files are. - exclude: ^pandas/_libs/src/(klib|headers)/ - args: [ - --quiet, - '--extensions=c,h', - '--headers=h', - --recursive, - --linelength=88, - '--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size' - ] -- repo: https://github.com/pycqa/pylint - rev: v2.16.2 - hooks: - - id: pylint - stages: [manual] - - id: pylint - alias: redefined-outer-name - name: Redefining name from outer scope - files: ^pandas/ - exclude: | - (?x) - ^pandas/tests # keep excluded - |/_testing/ # keep excluded - |^pandas/util/_test_decorators\.py # keep excluded - |^pandas/_version\.py # keep excluded - |^pandas/conftest\.py # keep excluded - args: [--disable=all, --enable=redefined-outer-name] - stages: [manual] - - id: pylint - alias: unspecified-encoding - name: Using open without explicitly specifying an encoding - args: [--disable=all, --enable=unspecified-encoding] - stages: [manual] -- repo: https://github.com/PyCQA/isort - rev: 5.12.0 - hooks: - - id: isort -- repo: https://github.com/asottile/pyupgrade - rev: v3.3.1 - hooks: - - id: pyupgrade - args: [--py38-plus] -- repo: https://github.com/pre-commit/pygrep-hooks - rev: v1.10.0 - hooks: - - id: rst-backticks - - id: rst-directive-colons - types: [text] # overwrite types: [rst] - types_or: [python, rst] - - id: rst-inline-touching-normal - types: [text] # overwrite types: [rst] - types_or: [python, rst] -- repo: https://github.com/sphinx-contrib/sphinx-lint - rev: v0.6.7 - hooks: - - id: sphinx-lint -- repo: local - hooks: - - id: pyright - # note: assumes python env is setup and activated - name: pyright - entry: pyright - language: node - pass_filenames: false - types: [python] - stages: [manual] - additional_dependencies: &pyright_dependencies - - pyright@1.1.292 - - id: pyright_reportGeneralTypeIssues - # note: assumes python env is setup and activated - name: pyright reportGeneralTypeIssues - entry: pyright --skipunannotated -p pyright_reportGeneralTypeIssues.json --level warning - language: node - pass_filenames: false - types: [python] - stages: [manual] - additional_dependencies: *pyright_dependencies - - id: mypy - # note: assumes python env is setup and activated - name: mypy - entry: mypy - language: system - pass_filenames: false - types: [python] - stages: [manual] - - id: stubtest - # note: assumes python env is setup and activated - # note: requires pandas dev to be installed - name: mypy (stubtest) - entry: python - language: system - pass_filenames: false - types: [pyi] - args: [scripts/run_stubtest.py] - stages: [manual] - - id: inconsistent-namespace-usage - name: 'Check for inconsistent use of pandas namespace' - entry: python scripts/check_for_inconsistent_pandas_namespace.py - exclude: ^pandas/core/interchange/ - language: python - types: [python] - - id: no-os-remove - name: Check code for instances of os.remove - entry: os\.remove - language: pygrep - types: [python] - files: ^pandas/tests/ - exclude: | - (?x)^ - pandas/tests/io/pytables/test_store\.py$ - - id: unwanted-patterns - name: Unwanted patterns - language: pygrep - entry: | - (?x) - # outdated annotation syntax, missing error codes - \#\ type:\ (?!ignore) - |\#\ type:\s?ignore(?!\[) - - # foo._class__ instead of type(foo) - |\.__class__ - - # np.bool/np.object instead of np.bool_/np.object_ - |np\.bool[^_8`] - |np\.object[^_8`] - - # imports from collections.abc instead of `from collections import abc` - |from\ collections\.abc\ import - - # Numpy - |from\ numpy\ import\ random - |from\ numpy\.random\ import - - # Incorrect code-block / IPython directives - |\.\.\ code-block\ :: - |\.\.\ ipython\ :: - # directive should not have a space before :: - |\.\.\ \w+\ :: - - # Check for deprecated messages without sphinx directive - |(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.) - - # {foo!r} instead of {repr(foo)} - |!r} - - # builtin filter function - |(?obj`, not ` obj` - language: pygrep - entry: '[a-zA-Z0-9*]> ' - files: (\.pyx|\.pxi.in)$ - - id: incorrect-backticks - name: Check for backticks incorrectly rendering because of missing spaces - language: pygrep - entry: '[a-zA-Z0-9]\`\`?[a-zA-Z0-9]' - types: [rst] - files: ^doc/source/ - - id: seed-check-asv - name: Check for unnecessary random seeds in asv benchmarks - language: pygrep - entry: 'np\.random\.seed' - files: ^asv_bench/benchmarks - exclude: ^asv_bench/benchmarks/pandas_vb_common\.py - - id: np-testing-array-equal - name: Check for usage of numpy testing or array_equal - language: pygrep - entry: '(numpy|np)(\.testing|\.array_equal)' - files: ^pandas/tests/ - types: [python] - - id: invalid-ea-testing - name: Check for invalid EA testing - language: pygrep - entry: 'tm\.assert_(series|frame)_equal' - files: ^pandas/tests/extension/base - types: [python] - exclude: ^pandas/tests/extension/base/base\.py - - id: unwanted-patterns-in-tests - name: Unwanted patterns in tests - language: pygrep - entry: | - (?x) - # pytest.xfail instead of pytest.mark.xfail - pytest\.xfail - - # imports from pandas._testing instead of `import pandas._testing as tm` - |from\ pandas\._testing\ import - |from\ pandas\ import\ _testing\ as\ tm - - # No direct imports from conftest - |conftest\ import - |import\ conftest - - # pandas.testing instead of tm - |pd\.testing\. - - # pd.api.types instead of from pandas.api.types import ... - |(pd|pandas)\.api\.types\. - - # np.testing, np.array_equal - |(numpy|np)(\.testing|\.array_equal) - - # unittest.mock (use pytest builtin monkeypatch fixture instead) - |(unittest(\.| import )mock|mock\.Mock\(\)|mock\.patch) - - # pytest raises without context - |\s\ pytest.raises - - # pytest.warns (use tm.assert_produces_warning instead) - |pytest\.warns - files: ^pandas/tests/ - types_or: [python, cython, rst] - - id: unwanted-patterns-in-ea-tests - name: Unwanted patterns in EA tests - language: pygrep - entry: | - (?x) - tm.assert_(series|frame)_equal - files: ^pandas/tests/extension/base/ - exclude: ^pandas/tests/extension/base/base\.py$ - types_or: [python, cython, rst] - - id: unwanted-patterns-in-cython - name: Unwanted patterns in Cython code - language: pygrep - entry: | - (?x) - # `obj` as opposed to ` obj` - [a-zA-Z0-9*]>[ ] - types: [cython] - - id: pip-to-conda - name: Generate pip dependency from conda - language: python - entry: python scripts/generate_pip_deps_from_conda.py - files: ^(environment.yml|requirements-dev.txt)$ - pass_filenames: false - additional_dependencies: [tomli, pyyaml] - - id: title-capitalization - name: Validate correct capitalization among titles in documentation - entry: python scripts/validate_rst_title_capitalization.py - language: python - types: [rst] - files: ^doc/source/(development|reference)/ - - id: unwanted-patterns-bare-pytest-raises - name: Check for use of bare pytest raises - language: python - entry: python scripts/validate_unwanted_patterns.py --validation-type="bare_pytest_raises" - types: [python] - files: ^pandas/tests/ - exclude: ^pandas/tests/extension/ - - id: unwanted-patterns-private-function-across-module - name: Check for use of private functions across modules - language: python - entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" - types: [python] - exclude: ^(asv_bench|pandas/tests|doc)/ - - id: unwanted-patterns-private-import-across-module - name: Check for import of private attributes across modules - language: python - entry: python scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" - types: [python] - exclude: | - (?x) - ^(asv_bench|pandas/tests|doc)/ - |scripts/validate_min_versions_in_sync\.py$ - - id: unwanted-patterns-strings-with-misplaced-whitespace - name: Check for strings with misplaced spaces - language: python - entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace" - types_or: [python, cython] - - id: use-pd_array-in-core - name: Import pandas.array as pd_array in core - language: python - entry: python scripts/use_pd_array_in_core.py - files: ^pandas/core/ - exclude: ^pandas/core/api\.py$ - types: [python] - - id: use-io-common-urlopen - name: Use pandas.io.common.urlopen instead of urllib.request.urlopen - language: python - entry: python scripts/use_io_common_urlopen.py - files: ^pandas/ - exclude: ^pandas/tests/ - types: [python] - - id: no-bool-in-core-generic - name: Use bool_t instead of bool in pandas/core/generic.py - entry: python scripts/no_bool_in_generic.py - language: python - files: ^pandas/core/generic\.py$ - - id: no-return-exception - name: Use raise instead of return for exceptions - language: pygrep - entry: 'return [A-Za-z]+(Error|Exit|Interrupt|Exception|Iteration)' - files: ^pandas/ - types: [python] - exclude: ^pandas/tests/ - - id: pandas-errors-documented - name: Ensure pandas errors are documented in doc/source/reference/testing.rst - entry: python scripts/pandas_errors_documented.py - language: python - files: ^pandas/errors/__init__.py$ - - id: pg8000-not-installed-CI - name: Check for pg8000 not installed on CI for test_pg8000_sqlalchemy_passthrough_error - language: pygrep - entry: 'pg8000' - files: ^ci/deps - types: [yaml] - - id: validate-min-versions-in-sync - name: Check minimum version of dependencies are aligned - entry: python -m scripts.validate_min_versions_in_sync - language: python - files: ^(ci/deps/actions-.*-minimum_versions\.yaml|pandas/compat/_optional\.py)$ - additional_dependencies: [tomli, pyyaml] - pass_filenames: false - - id: validate-errors-locations - name: Validate errors locations - description: Validate errors are in appropriate locations. - entry: python scripts/validate_exception_location.py - language: python - files: ^pandas/ - exclude: ^(pandas/_libs/|pandas/tests/|pandas/errors/__init__.py$|pandas/_version.py) - types: [python] - - id: future-annotations - name: import annotations from __future__ - entry: 'from __future__ import annotations' - language: pygrep - args: [--negate] - files: ^pandas/ - types: [python] - exclude: | - (?x) - /(__init__\.py)|(api\.py)|(_version\.py)|(testing\.py)|(conftest\.py)$ - |/tests/ - |/_testing/ - - id: autotyping - name: autotyping - entry: python -m scripts.run_autotyping - types_or: [python, pyi] - files: ^pandas - exclude: ^(pandas/tests|pandas/_version.py|pandas/io/clipboard) - language: python - stages: [manual] - additional_dependencies: - - autotyping==23.3.0 - - libcst==0.4.9 - - id: check-test-naming - name: check that test names start with 'test' - entry: python -m scripts.check_test_naming - types: [python] - files: ^pandas/tests - language: python - - id: sort-whatsnew-items - name: sort whatsnew entries alphabetically - entry: python -m scripts.sort_whatsnew_note - types: [rst] - language: python - files: ^doc/source/whatsnew/v - exclude: ^doc/source/whatsnew/v(0|1|2\.0\.0) From 3d72dc3c32b9b97e9e6d80efef8dd207b0e9df16 Mon Sep 17 00:00:00 2001 From: Ketu Patel Date: Sat, 29 Apr 2023 17:47:06 +0530 Subject: [PATCH 06/17] Final Commit --- asv_bench/benchmarks/io/csv.py | 2 +- doc/make.py | 8 +++--- doc/source/conf.py | 4 +-- pandas/_testing/contexts.py | 2 +- pandas/_version.py | 2 +- pandas/core/series.py | 2 +- pandas/io/clipboard/__init__.py | 4 +-- pandas/tests/frame/methods/test_to_csv.py | 2 +- pandas/tests/io/excel/test_readers.py | 2 +- pandas/tests/io/formats/style/test_html.py | 2 +- pandas/tests/io/formats/test_to_csv.py | 24 ++++++++--------- pandas/tests/io/formats/test_to_latex.py | 2 +- pandas/tests/io/json/test_pandas.py | 2 +- .../tests/io/parser/common/test_chunksize.py | 2 +- .../io/parser/common/test_file_buffer_url.py | 6 ++--- .../tests/io/parser/common/test_iterator.py | 4 +-- pandas/tests/io/parser/test_c_parser_only.py | 6 ++--- pandas/tests/io/parser/test_compression.py | 6 +++-- pandas/tests/io/test_common.py | 4 +-- pandas/tests/io/test_compression.py | 4 +-- pandas/tests/io/test_gcs.py | 2 +- pandas/tests/io/test_html.py | 2 +- pandas/tests/io/xml/test_to_xml.py | 6 ++--- pandas/tests/io/xml/test_xml.py | 26 +++++++++---------- pandas/tests/io/xml/test_xml_dtypes.py | 2 +- pandas/tests/series/methods/test_to_csv.py | 4 +-- pandas/tests/util/test_show_versions.py | 4 +-- scripts/generate_pxi.py | 4 +-- scripts/generate_version.py | 2 +- scripts/pandas_errors_documented.py | 2 +- scripts/sort_whatsnew_note.py | 4 +-- .../test_validate_min_versions_in_sync.py | 4 +-- scripts/validate_docstrings.py | 2 +- scripts/validate_exception_location.py | 2 +- scripts/validate_min_versions_in_sync.py | 4 +-- setup.py | 4 +-- web/pandas_web.py | 26 ++++++++++++++----- 37 files changed, 102 insertions(+), 88 deletions(-) diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py index 856327dfe876f..07d536d827959 100644 --- a/asv_bench/benchmarks/io/csv.py +++ b/asv_bench/benchmarks/io/csv.py @@ -444,7 +444,7 @@ class ReadCSVMemoryGrowth(BaseIO): param_names = ["engine"] def setup(self, engine): - with open(self.fname, "w",encoding="utf-8") as f: + with open(self.fname, "w", encoding="utf-8") as f: for i in range(self.num_rows): f.write(f"{i}\n") diff --git a/doc/make.py b/doc/make.py index 78edbc997eb55..ed13ed87bcdbb 100755 --- a/doc/make.py +++ b/doc/make.py @@ -163,12 +163,12 @@ def _get_page_title(self, page): components=(docutils.parsers.rst.Parser,) ) doc = docutils.utils.new_document("", option_parser.get_default_values()) - with open(fname,encoding="utf-8") as f: + with open(fname, encoding="utf-8") as f: data = f.read() parser = docutils.parsers.rst.Parser() # do not generate any warning when parsing the rst - with open(os.devnull, "a",encoding="utf-8") as f: + with open(os.devnull, "a", encoding="utf-8") as f: doc.reporter.stream = f parser.parse(data, doc) @@ -186,7 +186,7 @@ def _add_redirects(self): Create in the build directory an html file with a redirect, for every row in REDIRECTS_FILE. """ - with open(REDIRECTS_FILE,encoding="utf-8") as mapping_fd: + with open(REDIRECTS_FILE, encoding="utf-8") as mapping_fd: reader = csv.reader(mapping_fd) for row in reader: if not row or row[0].strip().startswith("#"): @@ -209,7 +209,7 @@ def _add_redirects(self): # sphinx specific stuff title = "this page" - with open(path, "w",encoding="utf-8") as moved_page_fd: + with open(path, "w", encoding="utf-8") as moved_page_fd: html = f"""\ diff --git a/doc/source/conf.py b/doc/source/conf.py index e5e764f4c7ef4..6c0b1c21b8778 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -117,9 +117,9 @@ elif single_doc and rel_fname != pattern: exclude_patterns.append(rel_fname) -with open(os.path.join(source_path, "index.rst.template"),encoding="utf-8") as f: +with open(os.path.join(source_path, "index.rst.template"), encoding="utf-8") as f: t = jinja2.Template(f.read()) -with open(os.path.join(source_path, "index.rst"), "w",encoding="utf-8") as f: +with open(os.path.join(source_path, "index.rst"), "w", encoding="utf-8") as f: f.write( t.render( include_api=include_api, diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index d36dedc3008ac..f11041d477701 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -126,7 +126,7 @@ def ensure_clean( handle_or_str: str | IO = str(path) if return_filelike: kwargs.setdefault("mode", "w+b") - handle_or_str = open(path, **kwargs,encoding="utf-8") + handle_or_str = open(path, **kwargs, encoding="utf-8") try: yield handle_or_str diff --git a/pandas/_version.py b/pandas/_version.py index 59215ff72e715..8c655648377c7 100644 --- a/pandas/_version.py +++ b/pandas/_version.py @@ -159,7 +159,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - with open(versionfile_abs,encoding="utf-8") as fobj: + with open(versionfile_abs, encoding="utf-8") as fobj: for line in fobj: if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) diff --git a/pandas/core/series.py b/pandas/core/series.py index d7db059b7cba5..7053b88e0da2b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1699,7 +1699,7 @@ def to_string( if hasattr(buf, "write"): buf.write(result) else: - with open(buf, "w",encoding="utf-8") as f: + with open(buf, "w", encoding="utf-8") as f: f.write(result) return None diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py index d29c6c216256c..c07f51d875d4d 100644 --- a/pandas/io/clipboard/__init__.py +++ b/pandas/io/clipboard/__init__.py @@ -282,11 +282,11 @@ def copy_dev_clipboard(text): stacklevel=find_stack_level(), ) - with open("/dev/clipboard", "w",encoding="utf-8") as fd: + with open("/dev/clipboard", "w", encoding="utf-8") as fd: fd.write(text) def paste_dev_clipboard() -> str: - with open("/dev/clipboard",encoding="utf-8") as fd: + with open("/dev/clipboard", encoding="utf-8") as fd: content = fd.read() return content diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 494bff217b60e..b44b05f9f8153 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -626,7 +626,7 @@ def test_to_csv_float32_nanrep(self): with tm.ensure_clean("__tmp_to_csv_float32_nanrep__.csv") as path: df.to_csv(path, na_rep=999) - with open(path,encoding="utf-8") as f: + with open(path, encoding="utf-8") as f: lines = f.readlines() assert lines[1].split(",")[2] == "999" diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index cf0df8e5c23e5..b8fdbce0f5c23 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1702,7 +1702,7 @@ def test_corrupt_files_closed(self, engine, read_ext): errors = (BadZipFile, xlrd.biffh.XLRDError) with tm.ensure_clean(f"corrupt{read_ext}") as file: - Path(file).write_text("corrupt",encoding="utf-8") + Path(file).write_text("corrupt", encoding="utf-8") with tm.assert_produces_warning(False): try: pd.ExcelFile(file, engine=engine) diff --git a/pandas/tests/io/formats/style/test_html.py b/pandas/tests/io/formats/style/test_html.py index d06e7ef254469..7b0617fbd829f 100644 --- a/pandas/tests/io/formats/style/test_html.py +++ b/pandas/tests/io/formats/style/test_html.py @@ -43,7 +43,7 @@ def tpl_table(): def test_html_template_extends_options(): # make sure if templates are edited tests are updated as are setup fixtures # to understand the dependency - with open("pandas/io/formats/templates/html.tpl",encoding="utf-8") as file: + with open("pandas/io/formats/templates/html.tpl", encoding="utf-8") as file: result = file.read() assert "{% include html_style_tpl %}" in result assert "{% include html_table_tpl %}" in result diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 2c2ed7f8514c7..a208daaf9f77b 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -32,7 +32,7 @@ def test_to_csv_with_single_column(self): """ with tm.ensure_clean("test.csv") as path: df1.to_csv(path, header=None, index=None) - with open(path,encoding="utf-8") as f: + with open(path, encoding="utf-8") as f: assert f.read() == expected1 df2 = DataFrame([1, None]) @@ -42,7 +42,7 @@ def test_to_csv_with_single_column(self): """ with tm.ensure_clean("test.csv") as path: df2.to_csv(path, header=None, index=None) - with open(path,encoding="utf-8") as f: + with open(path, encoding="utf-8") as f: assert f.read() == expected2 def test_to_csv_default_encoding(self): @@ -64,7 +64,7 @@ def test_to_csv_quotechar(self): with tm.ensure_clean("test.csv") as path: df.to_csv(path, quoting=1) # 1=QUOTE_ALL - with open(path,encoding="utf-8") as f: + with open(path, encoding="utf-8") as f: assert f.read() == expected expected = """\ @@ -75,7 +75,7 @@ def test_to_csv_quotechar(self): with tm.ensure_clean("test.csv") as path: df.to_csv(path, quoting=1, quotechar="$") - with open(path,encoding="utf-8") as f: + with open(path, encoding="utf-8") as f: assert f.read() == expected with tm.ensure_clean("test.csv") as path: @@ -92,7 +92,7 @@ def test_to_csv_doublequote(self): with tm.ensure_clean("test.csv") as path: df.to_csv(path, quoting=1, doublequote=True) # QUOTE_ALL - with open(path,encoding="utf-8") as f: + with open(path, encoding="utf-8") as f: assert f.read() == expected with tm.ensure_clean("test.csv") as path: @@ -109,7 +109,7 @@ def test_to_csv_escapechar(self): with tm.ensure_clean("test.csv") as path: # QUOTE_ALL df.to_csv(path, quoting=1, doublequote=False, escapechar="\\") - with open(path,encoding="utf-8") as f: + with open(path, encoding="utf-8") as f: assert f.read() == expected df = DataFrame({"col": ["a,a", ",bb,"]}) @@ -121,7 +121,7 @@ def test_to_csv_escapechar(self): with tm.ensure_clean("test.csv") as path: df.to_csv(path, quoting=3, escapechar="\\") # QUOTE_NONE - with open(path,encoding="utf-8") as f: + with open(path, encoding="utf-8") as f: assert f.read() == expected def test_csv_to_string(self): @@ -401,7 +401,7 @@ def test_to_csv_string_array_ascii(self): """ with tm.ensure_clean("str_test.csv") as path: df.to_csv(path, encoding="ascii") - with open(path,encoding="utf-8") as f: + with open(path, encoding="utf-8") as f: assert f.read() == expected_ascii def test_to_csv_string_array_utf8(self): @@ -415,7 +415,7 @@ def test_to_csv_string_array_utf8(self): """ with tm.ensure_clean("unicode_test.csv") as path: df.to_csv(path, encoding="utf-8") - with open(path,encoding="utf-8") as f: + with open(path, encoding="utf-8") as f: assert f.read() == expected_utf8 def test_to_csv_string_with_lf(self): @@ -521,10 +521,10 @@ def test_to_csv_write_to_open_file(self): z """ with tm.ensure_clean("test.txt") as path: - with open(path, "w",encoding="utf-8") as f: + with open(path, "w", encoding="utf-8") as f: f.write("manual header\n") df.to_csv(f, header=None, index=None) - with open(path,encoding="utf-8") as f: + with open(path, encoding="utf-8") as f: assert f.read() == expected def test_to_csv_write_to_open_file_with_newline_py3(self): @@ -534,7 +534,7 @@ def test_to_csv_write_to_open_file_with_newline_py3(self): expected_rows = ["x", "y", "z"] expected = "manual header\n" + tm.convert_rows_list_to_csv_str(expected_rows) with tm.ensure_clean("test.txt") as path: - with open(path, "w", newline="",encoding="utf-8") as f: + with open(path, "w", newline="", encoding="utf-8") as f: f.write("manual header\n") df.to_csv(f, header=None, index=None) diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index c46aa609922a3..f127dc1dfc74b 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -34,7 +34,7 @@ class TestToLatex: def test_to_latex_to_file(self, float_frame): with tm.ensure_clean("test.tex") as path: float_frame.to_latex(path) - with open(path,encoding="utf-8") as f: + with open(path, encoding="utf-8") as f: assert float_frame.to_latex() == f.read() def test_to_latex_to_file_utf8_with_encoding(self): diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 377e1cc3d99ba..788a6e97e3d0f 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1214,7 +1214,7 @@ def test_read_s3_jsonl(self, s3_resource, s3so): def test_read_local_jsonl(self): # GH17200 with tm.ensure_clean("tmp_items.json") as path: - with open(path, "w",encoding="utf-8") as infile: + with open(path, "w", encoding="utf-8") as infile: infile.write('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n') result = read_json(path, lines=True) expected = DataFrame([[1, 2], [1, 2]], columns=["a", "b"]) diff --git a/pandas/tests/io/parser/common/test_chunksize.py b/pandas/tests/io/parser/common/test_chunksize.py index 14327a98ec47e..6be7269cb8433 100644 --- a/pandas/tests/io/parser/common/test_chunksize.py +++ b/pandas/tests/io/parser/common/test_chunksize.py @@ -228,7 +228,7 @@ def test_read_csv_memory_growth_chunksize(all_parsers): parser = all_parsers with tm.ensure_clean() as path: - with open(path, "w",encoding="utf-8") as f: + with open(path, "w", encoding="utf-8") as f: for i in range(1000): f.write(str(i) + "\n") diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py index ba41b46f37099..1f3ee18541f4d 100644 --- a/pandas/tests/io/parser/common/test_file_buffer_url.py +++ b/pandas/tests/io/parser/common/test_file_buffer_url.py @@ -107,7 +107,7 @@ def test_no_permission(all_parsers): # verify that this process cannot open the file (not running as sudo) try: - with open(path,encoding="utf-8"): + with open(path, encoding="utf-8"): pass pytest.skip("Running as sudo.") except PermissionError: @@ -285,7 +285,7 @@ def test_file_handles_with_open(all_parsers, csv1): parser = all_parsers for mode in ["r", "rb"]: - with open(csv1, mode,encoding="utf-8") as f: + with open(csv1, mode, encoding="utf-8") as f: parser.read_csv(f) assert not f.closed @@ -392,7 +392,7 @@ def test_context_manageri_user_provided(all_parsers, datapath): # make sure that user-provided handles are not closed parser = all_parsers - with open(datapath("io", "data", "csv", "iris.csv"),encoding="utf-8") as path: + with open(datapath("io", "data", "csv", "iris.csv"), encoding="utf-8") as path: reader = parser.read_csv(path, chunksize=1) assert not reader.handles.handle.closed try: diff --git a/pandas/tests/io/parser/common/test_iterator.py b/pandas/tests/io/parser/common/test_iterator.py index f517c94998138..58e5886aedd6b 100644 --- a/pandas/tests/io/parser/common/test_iterator.py +++ b/pandas/tests/io/parser/common/test_iterator.py @@ -95,10 +95,10 @@ def test_iteration_open_handle(all_parsers): kwargs = {"header": None} with tm.ensure_clean() as path: - with open(path, "w",encoding="utf-8") as f: + with open(path, "w", encoding="utf-8") as f: f.write("AAA\nBBB\nCCC\nDDD\nEEE\nFFF\nGGG") - with open(path,encoding="utf-8") as f: + with open(path, encoding="utf-8") as f: for line in f: if "CCC" in line: break diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py index d45f47a805019..425f5cfbcf392 100644 --- a/pandas/tests/io/parser/test_c_parser_only.py +++ b/pandas/tests/io/parser/test_c_parser_only.py @@ -603,7 +603,7 @@ def test_file_handles_mmap(c_parser_only, csv1): # Don't close user provided file handles. parser = c_parser_only - with open(csv1,encoding="utf-8") as f: + with open(csv1, encoding="utf-8") as f: with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as m: parser.read_csv(m) assert not m.closed @@ -615,7 +615,7 @@ def test_file_binary_mode(c_parser_only): expected = DataFrame([[1, 2, 3], [4, 5, 6]]) with tm.ensure_clean() as path: - with open(path, "w",encoding="utf-8") as f: + with open(path, "w", encoding="utf-8") as f: f.write("1,2,3\n4,5,6") with open(path, "rb") as f: @@ -627,7 +627,7 @@ def test_unix_style_breaks(c_parser_only): # GH 11020 parser = c_parser_only with tm.ensure_clean() as path: - with open(path, "w", newline="\n",encoding="utf-8") as f: + with open(path, "w", newline="\n", encoding="utf-8") as f: f.write("blah\n\ncol_1,col_2,col_3\n\n") result = parser.read_csv(path, skiprows=2, encoding="utf-8", engine="c") expected = DataFrame(columns=["col_1", "col_2", "col_3"]) diff --git a/pandas/tests/io/parser/test_compression.py b/pandas/tests/io/parser/test_compression.py index 7ab9237637f84..bcba9c4a1823d 100644 --- a/pandas/tests/io/parser/test_compression.py +++ b/pandas/tests/io/parser/test_compression.py @@ -129,7 +129,7 @@ def test_infer_compression(all_parsers, csv1, buffer, ext): kwargs["compression"] = "infer" if buffer: - with open(csv1,encoding="utf-8") as f: + with open(csv1, encoding="utf-8") as f: result = parser.read_csv(f, **kwargs) else: ext = "." + ext if ext else "" @@ -183,7 +183,9 @@ def test_ignore_compression_extension(all_parsers): with tm.ensure_clean("test.csv.zip") as path_zip: # make sure to create un-compressed file with zip extension df.to_csv(path_csv, index=False) - Path(path_zip).write_text(Path(path_csv).read_text(encoding="utf-8"),encoding="utf-8") + Path(path_zip).write_text( + Path(path_csv).read_text(encoding="utf-8"), encoding="utf-8" + ) tm.assert_frame_equal(parser.read_csv(path_zip, compression=None), df) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 4e273edfeec69..435b9bdade944 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -411,7 +411,7 @@ def test_constructor_bad_file(self, mmap_file): with pytest.raises(err, match=msg): icom._maybe_memory_map(non_file, True) - with open(mmap_file,encoding="utf-8") as target: + with open(mmap_file, encoding="utf-8") as target: pass msg = "I/O operation on closed file" @@ -419,7 +419,7 @@ def test_constructor_bad_file(self, mmap_file): icom._maybe_memory_map(target, True) def test_next(self, mmap_file): - with open(mmap_file,encoding="utf-8") as target: + with open(mmap_file, encoding="utf-8") as target: lines = target.readlines() with icom.get_handle( diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py index da159566bd8be..eadf35aedd708 100644 --- a/pandas/tests/io/test_compression.py +++ b/pandas/tests/io/test_compression.py @@ -299,10 +299,10 @@ def test_ambiguous_archive_zip(): def test_ambiguous_archive_tar(tmp_path): csvAPath = tmp_path / "a.csv" - with open(csvAPath, "w",encoding="utf-8") as a: + with open(csvAPath, "w", encoding="utf-8") as a: a.write("foo,bar\n") csvBPath = tmp_path / "b.csv" - with open(csvBPath, "w",encoding="utf-8") as b: + with open(csvBPath, "w", encoding="utf-8") as b: b.write("foo,bar\n") tarpath = tmp_path / "archive.tar" diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index f1fea1732492c..18cc0f0b11dc9 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -193,7 +193,7 @@ class MockGCSFileSystem(AbstractFileSystem): def open(self, path, mode="r", *args): if "w" not in mode: raise FileNotFoundError - return open(os.path.join(tmpdir, "test.parquet"), mode,encoding="utf-8") + return open(os.path.join(tmpdir, "test.parquet"), mode, encoding="utf-8") monkeypatch.setattr("gcsfs.GCSFileSystem", MockGCSFileSystem) df1.to_parquet( diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index bf82c14a88e8e..256fb61412fc8 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -692,7 +692,7 @@ def try_remove_ws(x): @pytest.mark.slow def test_gold_canyon(self, banklist_data): gc = "Gold Canyon" - with open(banklist_data,encoding="utf-8") as f: + with open(banklist_data, encoding="utf-8") as f: raw_text = f.read() assert gc in raw_text diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py index b58064745ee19..cb8ef8d2833d1 100644 --- a/pandas/tests/io/xml/test_to_xml.py +++ b/pandas/tests/io/xml/test_to_xml.py @@ -983,7 +983,7 @@ def test_unknown_parser(): def test_stylesheet_file_like(datapath, mode): xsl = datapath("io", "data", "xml", "row_field_output.xsl") - with open(xsl, mode,encoding="utf-8") as f: + with open(xsl, mode, encoding="utf-8") as f: assert geom_df.to_xml(stylesheet=f) == xsl_expected @@ -995,7 +995,7 @@ def test_stylesheet_io(datapath, mode): # consider using --check-untyped-defs xsl_obj: BytesIO | StringIO # type: ignore[annotation-unchecked] - with open(xsl_path, mode,encoding="utf-8") as f: + with open(xsl_path, mode, encoding="utf-8") as f: if mode == "rb": xsl_obj = BytesIO(f.read()) else: @@ -1010,7 +1010,7 @@ def test_stylesheet_io(datapath, mode): def test_stylesheet_buffered_reader(datapath, mode): xsl = datapath("io", "data", "xml", "row_field_output.xsl") - with open(xsl, mode,encoding="utf-8") as f: + with open(xsl, mode, encoding="utf-8") as f: xsl_obj = f.read() output = geom_df.to_xml(stylesheet=xsl_obj) diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index 2b3afc16b1ecd..08caa3307e9cf 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -259,7 +259,7 @@ def parser(request): def read_xml_iterparse(data, **kwargs): with tm.ensure_clean() as path: - with open(path, "w",encoding="utf-8") as f: + with open(path, "w", encoding="utf-8") as f: f.write(data) return read_xml(path, **kwargs) @@ -267,7 +267,7 @@ def read_xml_iterparse(data, **kwargs): def read_xml_iterparse_comp(comp_path, compression_only, **kwargs): with get_handle(comp_path, "r", compression=compression_only) as handles: with tm.ensure_clean() as path: - with open(path, "w",encoding="utf-8") as f: + with open(path, "w", encoding="utf-8") as f: f.write(handles.handle.read()) return read_xml(path, **kwargs) @@ -351,7 +351,7 @@ def test_parser_consistency_url(parser): def test_file_like(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") - with open(filename, mode,encoding="utf-8") as f: + with open(filename, mode, encoding="utf-8") as f: df_file = read_xml(f, parser=parser) df_expected = DataFrame( @@ -369,7 +369,7 @@ def test_file_like(datapath, parser, mode): def test_file_io(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") - with open(filename, mode,encoding="utf-8") as f: + with open(filename, mode, encoding="utf-8") as f: xml_obj = f.read() df_io = read_xml( @@ -392,7 +392,7 @@ def test_file_io(datapath, parser, mode): def test_file_buffered_reader_string(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") - with open(filename, mode,encoding="utf-8") as f: + with open(filename, mode, encoding="utf-8") as f: xml_obj = f.read() df_str = read_xml(xml_obj, parser=parser) @@ -412,7 +412,7 @@ def test_file_buffered_reader_string(datapath, parser, mode): def test_file_buffered_reader_no_xml_declaration(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") - with open(filename, mode,encoding="utf-8") as f: + with open(filename, mode, encoding="utf-8") as f: next(f) xml_obj = f.read() @@ -1154,7 +1154,7 @@ def test_stylesheet_file_like(datapath, mode): kml = datapath("io", "data", "xml", "cta_rail_lines.kml") xsl = datapath("io", "data", "xml", "flatten_doc.xsl") - with open(xsl, mode,encoding="utf-8") as f: + with open(xsl, mode, encoding="utf-8") as f: df_style = read_xml( kml, xpath=".//k:Placemark", @@ -1174,7 +1174,7 @@ def test_stylesheet_io(datapath, mode): # consider using --check-untyped-defs xsl_obj: BytesIO | StringIO # type: ignore[annotation-unchecked] - with open(xsl, mode,encoding="utf-8") as f: + with open(xsl, mode, encoding="utf-8") as f: if mode == "rb": xsl_obj = BytesIO(f.read()) else: @@ -1195,7 +1195,7 @@ def test_stylesheet_buffered_reader(datapath, mode): kml = datapath("io", "data", "xml", "cta_rail_lines.kml") xsl = datapath("io", "data", "xml", "flatten_doc.xsl") - with open(xsl, mode,encoding="utf-8") as f: + with open(xsl, mode, encoding="utf-8") as f: xsl_obj = f.read() df_style = read_xml( @@ -1355,7 +1355,7 @@ def test_stylesheet_file_close(datapath, mode): # consider using --check-untyped-defs xsl_obj: BytesIO | StringIO # type: ignore[annotation-unchecked] - with open(xsl, mode,encoding="utf-8") as f: + with open(xsl, mode, encoding="utf-8") as f: if mode == "rb": xsl_obj = BytesIO(f.read()) else: @@ -1407,7 +1407,7 @@ def test_string_error(parser): def test_file_like_iterparse(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") - with open(filename, mode,encoding="utf-8") as f: + with open(filename, mode, encoding="utf-8") as f: if mode == "r" and parser == "lxml": with pytest.raises( TypeError, match=("reading file objects must return bytes objects") @@ -1444,7 +1444,7 @@ def test_file_io_iterparse(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") funcIO = StringIO if mode == "r" else BytesIO - with open(filename, mode,encoding="utf-8") as f: + with open(filename, mode, encoding="utf-8") as f: with funcIO(f.read()) as b: if mode == "r" and parser == "lxml": with pytest.raises( @@ -1550,7 +1550,7 @@ def test_bad_xml(parser): """ with tm.ensure_clean(filename="bad.xml") as path: - with open(path, "w",encoding="utf-8") as f: + with open(path, "w", encoding="utf-8") as f: f.write(bad_xml) with pytest.raises( diff --git a/pandas/tests/io/xml/test_xml_dtypes.py b/pandas/tests/io/xml/test_xml_dtypes.py index ba563d17f277f..d62b9fa27e264 100644 --- a/pandas/tests/io/xml/test_xml_dtypes.py +++ b/pandas/tests/io/xml/test_xml_dtypes.py @@ -29,7 +29,7 @@ def iterparse(request): def read_xml_iterparse(data, **kwargs): with tm.ensure_clean() as path: - with open(path, "w",encoding="utf-8") as f: + with open(path, "w", encoding="utf-8") as f: f.write(data) return read_xml(path, **kwargs) diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py index d5384de4079a7..070ab872a4e5b 100644 --- a/pandas/tests/series/methods/test_to_csv.py +++ b/pandas/tests/series/methods/test_to_csv.py @@ -52,7 +52,7 @@ def test_from_csv(self, datetime_series, string_series): series_h = self.read_csv(path, header=0) assert series_h.name == "series" - with open(path, "w",encoding="utf-8") as outfile: + with open(path, "w", encoding="utf-8") as outfile: outfile.write("1998-01-01|1.0\n1999-01-01|2.0") series = self.read_csv(path, sep="|", parse_dates=True) @@ -69,7 +69,7 @@ def test_to_csv(self, datetime_series): with tm.ensure_clean() as path: datetime_series.to_csv(path, header=False) - with open(path, newline=None,encoding="utf-8") as f: + with open(path, newline=None, encoding="utf-8") as f: lines = f.readlines() assert lines[1] != "\n" diff --git a/pandas/tests/util/test_show_versions.py b/pandas/tests/util/test_show_versions.py index 856aa0062e43d..72c9db23b2108 100644 --- a/pandas/tests/util/test_show_versions.py +++ b/pandas/tests/util/test_show_versions.py @@ -16,7 +16,7 @@ def test_show_versions(tmpdir): pd.show_versions(as_json=as_json) - with open(as_json,encoding="utf-8") as fd: + with open(as_json, encoding="utf-8") as fd: # check if file output is valid JSON, will raise an exception if not result = json.load(fd) @@ -75,7 +75,7 @@ def test_json_output_match(capsys, tmpdir): out_path = os.path.join(tmpdir, "test_json.json") pd.show_versions(as_json=out_path) - with open(out_path,encoding="utf-8") as out_fd: + with open(out_path, encoding="utf-8") as out_fd: result_file = out_fd.read() assert result_console == result_file diff --git a/scripts/generate_pxi.py b/scripts/generate_pxi.py index 586b2d4fe3e35..47648a3937b4c 100644 --- a/scripts/generate_pxi.py +++ b/scripts/generate_pxi.py @@ -5,11 +5,11 @@ def process_tempita(pxifile, outfile): - with open(pxifile,encoding="utf-8") as f: + with open(pxifile, encoding="utf-8") as f: tmpl = f.read() pyxcontent = Tempita.sub(tmpl) - with open(outfile, "w",encoding="utf-8") as f: + with open(outfile, "w", encoding="utf-8") as f: f.write(pyxcontent) diff --git a/scripts/generate_version.py b/scripts/generate_version.py index 3b778567e2335..8a93e4c1df55e 100644 --- a/scripts/generate_version.py +++ b/scripts/generate_version.py @@ -8,7 +8,7 @@ def write_version_info(path): if os.environ.get("MESON_DIST_ROOT"): # raise ValueError("dist root is", os.environ.get("MESON_DIST_ROOT")) path = os.path.join(os.environ.get("MESON_DIST_ROOT"), path) - with open(path, "w",encoding="utf-8") as file: + with open(path, "w", encoding="utf-8") as file: file.write(f'__version__="{versioneer.get_version()}"\n') file.write( f'__git_version__="{versioneer.get_versions()["full-revisionid"]}"\n' diff --git a/scripts/pandas_errors_documented.py b/scripts/pandas_errors_documented.py index f23d9bc979334..116a63b33eaf0 100644 --- a/scripts/pandas_errors_documented.py +++ b/scripts/pandas_errors_documented.py @@ -34,7 +34,7 @@ def main(argv: Sequence[str] | None = None) -> None: args = parser.parse_args(argv) with open(args.path, encoding="utf-8") as f: file_errors = get_defined_errors(f.read()) - with open(API_PATH,encoding="utf-8") as f: + with open(API_PATH, encoding="utf-8") as f: doc_errors = { line.split(".")[1].strip() for line in f.readlines() if "errors" in line } diff --git a/scripts/sort_whatsnew_note.py b/scripts/sort_whatsnew_note.py index b7a427329a2df..531ea57244b23 100644 --- a/scripts/sort_whatsnew_note.py +++ b/scripts/sort_whatsnew_note.py @@ -63,12 +63,12 @@ def main(argv: Sequence[str] | None = None) -> int: args = parser.parse_args(argv) ret = 0 for path in args.paths: - with open(path,encoding="utf-8") as fd: + with open(path, encoding="utf-8") as fd: content = fd.read() new_content = sort_whatsnew_note(content) if content != new_content: ret |= 1 - with open(path, "w",encoding="utf-8") as fd: + with open(path, "w", encoding="utf-8") as fd: fd.write(new_content) return ret diff --git a/scripts/tests/test_validate_min_versions_in_sync.py b/scripts/tests/test_validate_min_versions_in_sync.py index d454bf7063c3c..ac33f8dcbffaf 100644 --- a/scripts/tests/test_validate_min_versions_in_sync.py +++ b/scripts/tests/test_validate_min_versions_in_sync.py @@ -49,13 +49,13 @@ def test_pin_min_versions_to_yaml_file(src_toml, src_yaml, expected_yaml): with open(src_toml, "rb") as toml_f: toml_map = tomllib.load(toml_f) - with open(src_yaml,encoding="utf-8") as yaml_f: + with open(src_yaml, encoding="utf-8") as yaml_f: yaml_file_data = yaml_f.read() yaml_file = yaml.safe_load(yaml_file_data) yaml_dependencies = yaml_file["dependencies"] yaml_map = get_yaml_map_from(yaml_dependencies) toml_map = get_toml_map_from(toml_map) result_yaml_file = pin_min_versions_to_yaml_file(yaml_map, toml_map, yaml_file_data) - with open(expected_yaml,encoding="utf-8") as yaml_f: + with open(expected_yaml, encoding="utf-8") as yaml_f: dummy_yaml_expected_file_1 = yaml_f.read() assert result_yaml_file == dummy_yaml_expected_file_1 diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 6b43506bd03f1..c9eb476ab65fa 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -367,7 +367,7 @@ def get_all_api_items(): base_path = pathlib.Path(__file__).parent.parent api_doc_fnames = pathlib.Path(base_path, "doc", "source", "reference") for api_doc_fname in api_doc_fnames.glob("*.rst"): - with open(api_doc_fname,encoding="utf-8") as f: + with open(api_doc_fname, encoding="utf-8") as f: yield from get_api_items(f) diff --git a/scripts/validate_exception_location.py b/scripts/validate_exception_location.py index 82154e82786b9..5f77e4c78db82 100644 --- a/scripts/validate_exception_location.py +++ b/scripts/validate_exception_location.py @@ -36,7 +36,7 @@ def get_warnings_and_exceptions_from_api_path() -> set[str]: - with open(API_PATH,encoding="utf-8") as f: + with open(API_PATH, encoding="utf-8") as f: doc_errors = { line.split(".")[1].strip() for line in f.readlines() if "errors" in line } diff --git a/scripts/validate_min_versions_in_sync.py b/scripts/validate_min_versions_in_sync.py index f4d51142876b9..cb03276d2dd93 100755 --- a/scripts/validate_min_versions_in_sync.py +++ b/scripts/validate_min_versions_in_sync.py @@ -63,7 +63,7 @@ def pin_min_versions_to_ci_deps() -> int: toml_dependencies = tomllib.load(toml_f) ret = 0 for curr_file in all_yaml_files: - with open(curr_file,encoding="utf-8") as yaml_f: + with open(curr_file, encoding="utf-8") as yaml_f: yaml_start_data = yaml_f.read() yaml_file = yaml.safe_load(yaml_start_data) yaml_dependencies = yaml_file["dependencies"] @@ -73,7 +73,7 @@ def pin_min_versions_to_ci_deps() -> int: yaml_map, toml_map, yaml_start_data ) if yaml_result_data != yaml_start_data: - with open(curr_file, "w",encoding="utf-8") as f: + with open(curr_file, "w", encoding="utf-8") as f: f.write(yaml_result_data) ret |= 1 return ret diff --git a/setup.py b/setup.py index 8fcd4804cf6e6..52739a97bec2a 100755 --- a/setup.py +++ b/setup.py @@ -88,11 +88,11 @@ def render_templates(cls, pxifiles): # if .pxi.in is not updated, no need to output .pxi continue - with open(pxifile,encoding="utf-8") as f: + with open(pxifile, encoding="utf-8") as f: tmpl = f.read() pyxcontent = Tempita.sub(tmpl) - with open(outfile, "w",encoding="utf-8") as f: + with open(outfile, "w", encoding="utf-8") as f: f.write(pyxcontent) def build_extensions(self): diff --git a/web/pandas_web.py b/web/pandas_web.py index 75dde11c25138..9191cde31c20f 100755 --- a/web/pandas_web.py +++ b/web/pandas_web.py @@ -110,7 +110,7 @@ def blog_add_posts(context): md = markdown.Markdown( extensions=context["main"]["markdown_extensions"] ) - with open(os.path.join(posts_path, fname),encoding="utf-8") as f: + with open(os.path.join(posts_path, fname), encoding="utf-8") as f: html = md.convert(f.read()) title = md.Meta["title"][0] summary = re.sub(tag_expr, "", html) @@ -197,7 +197,11 @@ def maintainers_add_info(context): # save the data fetched from github to use it in case we exceed # git github api quota in the future - with open(pathlib.Path(context["target_path"]) / "maintainers.json", "w",encoding="utf-8") as f: + with open( + pathlib.Path(context["target_path"]) / "maintainers.json", + "w", + encoding="utf-8", + ) as f: json.dump(maintainers_info, f) return context @@ -220,7 +224,11 @@ def home_add_releases(context): resp.raise_for_status() releases = resp.json() - with open(pathlib.Path(context["target_path"]) / "releases.json", "w",encoding="utf-8") as f: + with open( + pathlib.Path(context["target_path"]) / "releases.json", + "w", + encoding="utf-8", + ) as f: json.dump(releases, f, default=datetime.datetime.isoformat) for release in releases: @@ -304,7 +312,9 @@ def roadmap_pdeps(context): resp.raise_for_status() pdeps = resp.json() - with open(pathlib.Path(context["target_path"]) / "pdeps.json", "w",encoding="utf-8") as f: + with open( + pathlib.Path(context["target_path"]) / "pdeps.json", "w", encoding="utf-8" + ) as f: json.dump(pdeps, f) for pdep in sorted(pdeps["items"], key=operator.itemgetter("title")): @@ -346,7 +356,7 @@ def get_context(config_fname: str, **kwargs): Load the config yaml as the base context, and enrich it with the information added by the context preprocessors defined in the file. """ - with open(config_fname,encoding="utf-8") as f: + with open(config_fname, encoding="utf-8") as f: context = yaml.safe_load(f) context["source_path"] = os.path.dirname(config_fname) @@ -418,7 +428,7 @@ def main( extension = os.path.splitext(fname)[-1] if extension in (".html", ".md"): - with open(os.path.join(source_path, fname),encoding="utf-8") as f: + with open(os.path.join(source_path, fname), encoding="utf-8") as f: content = f.read() if extension == ".md": body = markdown.markdown( @@ -431,7 +441,9 @@ def main( context["base_url"] = "".join(["../"] * os.path.normpath(fname).count("/")) content = jinja_env.from_string(content).render(**context) fname_html = os.path.splitext(fname)[0] + ".html" - with open(os.path.join(target_path, fname_html), "w",encoding="utf-8") as f: + with open( + os.path.join(target_path, fname_html), "w", encoding="utf-8" + ) as f: f.write(content) else: shutil.copy( From 8890943c683467d4a875cdf4b1e1118a90a03301 Mon Sep 17 00:00:00 2001 From: Ketu Patel Date: Sat, 29 Apr 2023 19:17:42 +0530 Subject: [PATCH 07/17] Some Changed reflected --- pandas/_testing/contexts.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index f11041d477701..f09c59e09fbb2 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -126,7 +126,11 @@ def ensure_clean( handle_or_str: str | IO = str(path) if return_filelike: kwargs.setdefault("mode", "w+b") - handle_or_str = open(path, **kwargs, encoding="utf-8") + handle_or_str = open( + path, + encoding=kwargs.get("encoding", None), + **{key: value for key, value in kwargs.items() if key != "encoding"}, + ) try: yield handle_or_str From dcbe4dcc9a81afc2594c675e9e7da339899b6054 Mon Sep 17 00:00:00 2001 From: Ketu Patel Date: Sun, 30 Apr 2023 15:51:14 +0530 Subject: [PATCH 08/17] test_xml Updated --- pandas/tests/io/xml/test_xml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index 08caa3307e9cf..bf8da72adb732 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -1444,7 +1444,7 @@ def test_file_io_iterparse(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") funcIO = StringIO if mode == "r" else BytesIO - with open(filename, mode, encoding="utf-8") as f: + with open(filename, mode, encoding="utf-8" if mode=="r" else None) as f: with funcIO(f.read()) as b: if mode == "r" and parser == "lxml": with pytest.raises( From b4d107b44b972d039188b1a5a738753c433a3eeb Mon Sep 17 00:00:00 2001 From: Ketu Patel Date: Sun, 30 Apr 2023 16:03:08 +0530 Subject: [PATCH 09/17] Pre-commit check passed --- pandas/tests/io/xml/test_xml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index bf8da72adb732..df369546a4a45 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -1444,7 +1444,7 @@ def test_file_io_iterparse(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") funcIO = StringIO if mode == "r" else BytesIO - with open(filename, mode, encoding="utf-8" if mode=="r" else None) as f: + with open(filename, mode, encoding="utf-8" if mode == "r" else None) as f: with funcIO(f.read()) as b: if mode == "r" and parser == "lxml": with pytest.raises( From 959dadcd34f7aecb73338361e69159b78287d1db Mon Sep 17 00:00:00 2001 From: Ketu Patel Date: Sun, 30 Apr 2023 18:01:22 +0530 Subject: [PATCH 10/17] Mode changed in xml file --- pandas/tests/io/xml/test_xml.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index df369546a4a45..c28f8fd788787 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -1444,7 +1444,11 @@ def test_file_io_iterparse(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") funcIO = StringIO if mode == "r" else BytesIO - with open(filename, mode, encoding="utf-8" if mode == "r" else None) as f: + with open( + filename, + mode="utf-8" if mode == "r" else None, + encoding="utf-8" if mode == "r" else None, + ) as f: with funcIO(f.read()) as b: if mode == "r" and parser == "lxml": with pytest.raises( From 7aebe0519d6d919e52c2147ea85eb73458c54f87 Mon Sep 17 00:00:00 2001 From: Ketu Patel Date: Sun, 30 Apr 2023 18:36:45 +0530 Subject: [PATCH 11/17] mode reverted --- pandas/tests/io/xml/test_xml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index c28f8fd788787..5d101fc2ceb59 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -1446,7 +1446,7 @@ def test_file_io_iterparse(datapath, parser, mode): funcIO = StringIO if mode == "r" else BytesIO with open( filename, - mode="utf-8" if mode == "r" else None, + mode, encoding="utf-8" if mode == "r" else None, ) as f: with funcIO(f.read()) as b: From c5fb9d6db85595fabf973e8fc4a77e6ae7f573d2 Mon Sep 17 00:00:00 2001 From: Ketu Patel Date: Fri, 5 May 2023 19:03:19 +0530 Subject: [PATCH 12/17] Try to fix errors --- .../io/parser/common/test_file_buffer_url.py | 11 ++++++--- pandas/tests/io/xml/test_to_xml.py | 24 ++++++++++++------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py index 1f3ee18541f4d..7884e67044e9e 100644 --- a/pandas/tests/io/parser/common/test_file_buffer_url.py +++ b/pandas/tests/io/parser/common/test_file_buffer_url.py @@ -285,9 +285,14 @@ def test_file_handles_with_open(all_parsers, csv1): parser = all_parsers for mode in ["r", "rb"]: - with open(csv1, mode, encoding="utf-8") as f: - parser.read_csv(f) - assert not f.closed + if mode == "rb": + with open(csv1, mode) as f: + parser.read_csv(f) + assert not f.closed + else: + with open(csv1, mode, encoding="utf-8") as f: + parser.read_csv(f) + assert not f.closed def test_invalid_file_buffer_class(all_parsers): diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py index cb8ef8d2833d1..b080da206ce24 100644 --- a/pandas/tests/io/xml/test_to_xml.py +++ b/pandas/tests/io/xml/test_to_xml.py @@ -983,9 +983,12 @@ def test_unknown_parser(): def test_stylesheet_file_like(datapath, mode): xsl = datapath("io", "data", "xml", "row_field_output.xsl") - with open(xsl, mode, encoding="utf-8") as f: - assert geom_df.to_xml(stylesheet=f) == xsl_expected - + if mode == "rb": + with open(xsl, mode) as f: + assert geom_df.to_xml(stylesheet=f) == xsl_expected + else: + with open(xsl, mode, encoding="utf-8") as f: + assert geom_df.to_xml(stylesheet=f) == xsl_expected @td.skip_if_no("lxml") def test_stylesheet_io(datapath, mode): @@ -995,10 +998,11 @@ def test_stylesheet_io(datapath, mode): # consider using --check-untyped-defs xsl_obj: BytesIO | StringIO # type: ignore[annotation-unchecked] - with open(xsl_path, mode, encoding="utf-8") as f: - if mode == "rb": + if mode == "rb": + with open(xsl_path, "rb") as f: xsl_obj = BytesIO(f.read()) - else: + else: + with open(xsl_path, "r", encoding="utf-8") as f: xsl_obj = StringIO(f.read()) output = geom_df.to_xml(stylesheet=xsl_obj) @@ -1010,8 +1014,12 @@ def test_stylesheet_io(datapath, mode): def test_stylesheet_buffered_reader(datapath, mode): xsl = datapath("io", "data", "xml", "row_field_output.xsl") - with open(xsl, mode, encoding="utf-8") as f: - xsl_obj = f.read() + if mode == "rb": + with open(xsl, mode) as f: + xsl_obj = f.read() + else: + with open(xsl, mode, encoding="utf-8") as f: + xsl_obj = f.read() output = geom_df.to_xml(stylesheet=xsl_obj) From d250a1d148f326be2df17f0e4e19e29a7b4b82b6 Mon Sep 17 00:00:00 2001 From: Ketu Patel Date: Sat, 6 May 2023 00:47:24 +0530 Subject: [PATCH 13/17] error-checks --- pandas/tests/io/parser/common/test_file_buffer_url.py | 2 +- pandas/tests/io/xml/test_to_xml.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py index 7884e67044e9e..3bbd9c422dac8 100644 --- a/pandas/tests/io/parser/common/test_file_buffer_url.py +++ b/pandas/tests/io/parser/common/test_file_buffer_url.py @@ -286,7 +286,7 @@ def test_file_handles_with_open(all_parsers, csv1): for mode in ["r", "rb"]: if mode == "rb": - with open(csv1, mode) as f: + with open(csv1, mode, encoding="utf-8") as f: parser.read_csv(f) assert not f.closed else: diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py index b080da206ce24..59534bd421199 100644 --- a/pandas/tests/io/xml/test_to_xml.py +++ b/pandas/tests/io/xml/test_to_xml.py @@ -984,12 +984,13 @@ def test_stylesheet_file_like(datapath, mode): xsl = datapath("io", "data", "xml", "row_field_output.xsl") if mode == "rb": - with open(xsl, mode) as f: + with open(xsl, mode, encoding="utf-8") as f: assert geom_df.to_xml(stylesheet=f) == xsl_expected else: with open(xsl, mode, encoding="utf-8") as f: assert geom_df.to_xml(stylesheet=f) == xsl_expected + @td.skip_if_no("lxml") def test_stylesheet_io(datapath, mode): xsl_path = datapath("io", "data", "xml", "row_field_output.xsl") @@ -1002,7 +1003,7 @@ def test_stylesheet_io(datapath, mode): with open(xsl_path, "rb") as f: xsl_obj = BytesIO(f.read()) else: - with open(xsl_path, "r", encoding="utf-8") as f: + with open(xsl_path, encoding="utf-8") as f: xsl_obj = StringIO(f.read()) output = geom_df.to_xml(stylesheet=xsl_obj) @@ -1015,7 +1016,7 @@ def test_stylesheet_buffered_reader(datapath, mode): xsl = datapath("io", "data", "xml", "row_field_output.xsl") if mode == "rb": - with open(xsl, mode) as f: + with open(xsl, mode, encoding="utf-8") as f: xsl_obj = f.read() else: with open(xsl, mode, encoding="utf-8") as f: From 640e8e0c41a97200db88e35c7b09df5909a43309 Mon Sep 17 00:00:00 2001 From: Ketu Patel Date: Sat, 6 May 2023 20:41:54 +0530 Subject: [PATCH 14/17] Fix Some errors --- .../io/parser/common/test_file_buffer_url.py | 2 +- pandas/tests/io/xml/test_to_xml.py | 4 ++-- pandas/tests/io/xml/test_xml.py | 18 +++++++++--------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py index 3bbd9c422dac8..7884e67044e9e 100644 --- a/pandas/tests/io/parser/common/test_file_buffer_url.py +++ b/pandas/tests/io/parser/common/test_file_buffer_url.py @@ -286,7 +286,7 @@ def test_file_handles_with_open(all_parsers, csv1): for mode in ["r", "rb"]: if mode == "rb": - with open(csv1, mode, encoding="utf-8") as f: + with open(csv1, mode) as f: parser.read_csv(f) assert not f.closed else: diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py index 59534bd421199..1e6339e5c75d3 100644 --- a/pandas/tests/io/xml/test_to_xml.py +++ b/pandas/tests/io/xml/test_to_xml.py @@ -984,7 +984,7 @@ def test_stylesheet_file_like(datapath, mode): xsl = datapath("io", "data", "xml", "row_field_output.xsl") if mode == "rb": - with open(xsl, mode, encoding="utf-8") as f: + with open(xsl, mode) as f: assert geom_df.to_xml(stylesheet=f) == xsl_expected else: with open(xsl, mode, encoding="utf-8") as f: @@ -1016,7 +1016,7 @@ def test_stylesheet_buffered_reader(datapath, mode): xsl = datapath("io", "data", "xml", "row_field_output.xsl") if mode == "rb": - with open(xsl, mode, encoding="utf-8") as f: + with open(xsl, mode) as f: xsl_obj = f.read() else: with open(xsl, mode, encoding="utf-8") as f: diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index 28459ee86712d..49b8f8956de5b 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -351,7 +351,7 @@ def test_parser_consistency_url(parser): def test_file_like(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") - with open(filename, mode, encoding="utf-8") as f: + with open(filename, mode) as f: df_file = read_xml(f, parser=parser) df_expected = DataFrame( @@ -369,7 +369,7 @@ def test_file_like(datapath, parser, mode): def test_file_io(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") - with open(filename, mode, encoding="utf-8") as f: + with open(filename, mode) as f: xml_obj = f.read() df_io = read_xml( @@ -392,7 +392,7 @@ def test_file_io(datapath, parser, mode): def test_file_buffered_reader_string(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") - with open(filename, mode, encoding="utf-8") as f: + with open(filename, mode) as f: xml_obj = f.read() df_str = read_xml(xml_obj, parser=parser) @@ -412,7 +412,7 @@ def test_file_buffered_reader_string(datapath, parser, mode): def test_file_buffered_reader_no_xml_declaration(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") - with open(filename, mode, encoding="utf-8") as f: + with open(filename, mode) as f: next(f) xml_obj = f.read() @@ -1163,7 +1163,7 @@ def test_stylesheet_file_like(datapath, mode): kml = datapath("io", "data", "xml", "cta_rail_lines.kml") xsl = datapath("io", "data", "xml", "flatten_doc.xsl") - with open(xsl, mode, encoding="utf-8") as f: + with open(xsl, mode) as f: df_style = read_xml( kml, xpath=".//k:Placemark", @@ -1183,7 +1183,7 @@ def test_stylesheet_io(datapath, mode): # consider using --check-untyped-defs xsl_obj: BytesIO | StringIO # type: ignore[annotation-unchecked] - with open(xsl, mode, encoding="utf-8") as f: + with open(xsl, mode) as f: if mode == "rb": xsl_obj = BytesIO(f.read()) else: @@ -1204,7 +1204,7 @@ def test_stylesheet_buffered_reader(datapath, mode): kml = datapath("io", "data", "xml", "cta_rail_lines.kml") xsl = datapath("io", "data", "xml", "flatten_doc.xsl") - with open(xsl, mode, encoding="utf-8") as f: + with open(xsl, mode) as f: xsl_obj = f.read() df_style = read_xml( @@ -1364,7 +1364,7 @@ def test_stylesheet_file_close(datapath, mode): # consider using --check-untyped-defs xsl_obj: BytesIO | StringIO # type: ignore[annotation-unchecked] - with open(xsl, mode, encoding="utf-8") as f: + with open(xsl, mode) as f: if mode == "rb": xsl_obj = BytesIO(f.read()) else: @@ -1416,7 +1416,7 @@ def test_string_error(parser): def test_file_like_iterparse(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") - with open(filename, mode, encoding="utf-8") as f: + with open(filename, mode) as f: if mode == "r" and parser == "lxml": with pytest.raises( TypeError, match=("reading file objects must return bytes objects") From 98c004c3d34c8677f7b53f6b480e9f7c20ddc9b9 Mon Sep 17 00:00:00 2001 From: Ketu Patel Date: Sat, 6 May 2023 21:47:15 +0530 Subject: [PATCH 15/17] Unspecified-encodingFixed --- .../io/parser/common/test_file_buffer_url.py | 2 +- pandas/tests/io/xml/test_to_xml.py | 4 ++-- pandas/tests/io/xml/test_xml.py | 18 +++++++++--------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py index 7884e67044e9e..aaba42018a439 100644 --- a/pandas/tests/io/parser/common/test_file_buffer_url.py +++ b/pandas/tests/io/parser/common/test_file_buffer_url.py @@ -286,7 +286,7 @@ def test_file_handles_with_open(all_parsers, csv1): for mode in ["r", "rb"]: if mode == "rb": - with open(csv1, mode) as f: + with open(csv1, mode, encoding="utf-8" if mode == "r" else None) as f: parser.read_csv(f) assert not f.closed else: diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py index 1e6339e5c75d3..057734595311e 100644 --- a/pandas/tests/io/xml/test_to_xml.py +++ b/pandas/tests/io/xml/test_to_xml.py @@ -984,7 +984,7 @@ def test_stylesheet_file_like(datapath, mode): xsl = datapath("io", "data", "xml", "row_field_output.xsl") if mode == "rb": - with open(xsl, mode) as f: + with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f: assert geom_df.to_xml(stylesheet=f) == xsl_expected else: with open(xsl, mode, encoding="utf-8") as f: @@ -1016,7 +1016,7 @@ def test_stylesheet_buffered_reader(datapath, mode): xsl = datapath("io", "data", "xml", "row_field_output.xsl") if mode == "rb": - with open(xsl, mode) as f: + with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f: xsl_obj = f.read() else: with open(xsl, mode, encoding="utf-8") as f: diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index 49b8f8956de5b..04abebe4a0a71 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -351,7 +351,7 @@ def test_parser_consistency_url(parser): def test_file_like(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") - with open(filename, mode) as f: + with open(filename, mode, encoding="utf-8" if mode == "r" else None) as f: df_file = read_xml(f, parser=parser) df_expected = DataFrame( @@ -369,7 +369,7 @@ def test_file_like(datapath, parser, mode): def test_file_io(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") - with open(filename, mode) as f: + with open(filename, mode, encoding="utf-8" if mode == "r" else None) as f: xml_obj = f.read() df_io = read_xml( @@ -392,7 +392,7 @@ def test_file_io(datapath, parser, mode): def test_file_buffered_reader_string(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") - with open(filename, mode) as f: + with open(filename, mode, encoding="utf-8" if mode == "r" else None) as f: xml_obj = f.read() df_str = read_xml(xml_obj, parser=parser) @@ -412,7 +412,7 @@ def test_file_buffered_reader_string(datapath, parser, mode): def test_file_buffered_reader_no_xml_declaration(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") - with open(filename, mode) as f: + with open(filename, mode, encoding="utf-8" if mode == "r" else None) as f: next(f) xml_obj = f.read() @@ -1163,7 +1163,7 @@ def test_stylesheet_file_like(datapath, mode): kml = datapath("io", "data", "xml", "cta_rail_lines.kml") xsl = datapath("io", "data", "xml", "flatten_doc.xsl") - with open(xsl, mode) as f: + with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f: df_style = read_xml( kml, xpath=".//k:Placemark", @@ -1183,7 +1183,7 @@ def test_stylesheet_io(datapath, mode): # consider using --check-untyped-defs xsl_obj: BytesIO | StringIO # type: ignore[annotation-unchecked] - with open(xsl, mode) as f: + with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f: if mode == "rb": xsl_obj = BytesIO(f.read()) else: @@ -1204,7 +1204,7 @@ def test_stylesheet_buffered_reader(datapath, mode): kml = datapath("io", "data", "xml", "cta_rail_lines.kml") xsl = datapath("io", "data", "xml", "flatten_doc.xsl") - with open(xsl, mode) as f: + with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f: xsl_obj = f.read() df_style = read_xml( @@ -1364,7 +1364,7 @@ def test_stylesheet_file_close(datapath, mode): # consider using --check-untyped-defs xsl_obj: BytesIO | StringIO # type: ignore[annotation-unchecked] - with open(xsl, mode) as f: + with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f: if mode == "rb": xsl_obj = BytesIO(f.read()) else: @@ -1416,7 +1416,7 @@ def test_string_error(parser): def test_file_like_iterparse(datapath, parser, mode): filename = datapath("io", "data", "xml", "books.xml") - with open(filename, mode) as f: + with open(filename, mode, encoding="utf-8" if mode == "r" else None) as f: if mode == "r" and parser == "lxml": with pytest.raises( TypeError, match=("reading file objects must return bytes objects") From 513aebd26b35f7e96100af0e2ddd429deebfb3d9 Mon Sep 17 00:00:00 2001 From: Ketu Patel Date: Sun, 7 May 2023 10:22:22 +0530 Subject: [PATCH 16/17] final commited --- .../io/parser/common/test_file_buffer_url.py | 11 +++------ pandas/tests/io/xml/test_to_xml.py | 23 ++++++------------- 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/pandas/tests/io/parser/common/test_file_buffer_url.py b/pandas/tests/io/parser/common/test_file_buffer_url.py index aaba42018a439..ba196a532adf6 100644 --- a/pandas/tests/io/parser/common/test_file_buffer_url.py +++ b/pandas/tests/io/parser/common/test_file_buffer_url.py @@ -285,14 +285,9 @@ def test_file_handles_with_open(all_parsers, csv1): parser = all_parsers for mode in ["r", "rb"]: - if mode == "rb": - with open(csv1, mode, encoding="utf-8" if mode == "r" else None) as f: - parser.read_csv(f) - assert not f.closed - else: - with open(csv1, mode, encoding="utf-8") as f: - parser.read_csv(f) - assert not f.closed + with open(csv1, mode, encoding="utf-8" if mode == "r" else None) as f: + parser.read_csv(f) + assert not f.closed def test_invalid_file_buffer_class(all_parsers): diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py index 057734595311e..1f1f44f408fc1 100644 --- a/pandas/tests/io/xml/test_to_xml.py +++ b/pandas/tests/io/xml/test_to_xml.py @@ -983,12 +983,8 @@ def test_unknown_parser(): def test_stylesheet_file_like(datapath, mode): xsl = datapath("io", "data", "xml", "row_field_output.xsl") - if mode == "rb": - with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f: - assert geom_df.to_xml(stylesheet=f) == xsl_expected - else: - with open(xsl, mode, encoding="utf-8") as f: - assert geom_df.to_xml(stylesheet=f) == xsl_expected + with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f: + assert geom_df.to_xml(stylesheet=f) == xsl_expected @td.skip_if_no("lxml") @@ -999,11 +995,10 @@ def test_stylesheet_io(datapath, mode): # consider using --check-untyped-defs xsl_obj: BytesIO | StringIO # type: ignore[annotation-unchecked] - if mode == "rb": - with open(xsl_path, "rb") as f: + with open(xsl_path, mode, encoding="utf-8" if mode == "r" else None) as f: + if mode == "rb": xsl_obj = BytesIO(f.read()) - else: - with open(xsl_path, encoding="utf-8") as f: + else: xsl_obj = StringIO(f.read()) output = geom_df.to_xml(stylesheet=xsl_obj) @@ -1015,12 +1010,8 @@ def test_stylesheet_io(datapath, mode): def test_stylesheet_buffered_reader(datapath, mode): xsl = datapath("io", "data", "xml", "row_field_output.xsl") - if mode == "rb": - with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f: - xsl_obj = f.read() - else: - with open(xsl, mode, encoding="utf-8") as f: - xsl_obj = f.read() + with open(xsl, mode, encoding="utf-8" if mode == "r" else None) as f: + xsl_obj = f.read() output = geom_df.to_xml(stylesheet=xsl_obj) From 91287692679cd80dde5bd73669ce02ed8e47d703 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Sun, 7 May 2023 09:04:00 +0100 Subject: [PATCH 17/17] simplify --- pandas/_testing/contexts.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index f09c59e09fbb2..ba2c8c219dc41 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -124,13 +124,10 @@ def ensure_clean( path.touch() handle_or_str: str | IO = str(path) + encoding = kwargs.pop("encoding", None) if return_filelike: kwargs.setdefault("mode", "w+b") - handle_or_str = open( - path, - encoding=kwargs.get("encoding", None), - **{key: value for key, value in kwargs.items() if key != "encoding"}, - ) + handle_or_str = open(path, encoding=encoding, **kwargs) try: yield handle_or_str