From 1f698b95e770a741df748a227ffc3ce7770c88bf Mon Sep 17 00:00:00 2001 From: ErdiTk <24619655+ErdiTk@users.noreply.github.com> Date: Fri, 3 Mar 2023 22:58:19 +0100 Subject: [PATCH 1/5] Removed "PLW2901" and added in main folders where needs to be enabled gradually. Currently removed 10 errors generated outside the main folders. --- .pre-commit-config.yaml | 7 ++ doc/source/conf.py | 12 +-- doc/source/whatsnew/v2.1.0.rst | 9 ++- pandas/_config/config.py | 4 +- pandas/_libs/algos.pyx | 2 +- pandas/_libs/index.pyx | 10 +-- pandas/core/base.py | 2 +- pandas/core/groupby/generic.py | 17 +++++ pandas/core/groupby/ops.py | 5 ++ pandas/core/internals/construction.py | 5 +- pandas/core/series.py | 7 ++ pandas/io/orc.py | 22 ++++++ .../apply/test_frame_apply_relabeling.py | 8 ++ pandas/tests/groupby/test_groupby.py | 10 ++- pandas/util/version/__init__.py | 10 +-- pyproject.toml | 9 +-- scripts/no_bool_in_generic.py | 4 +- scripts/sort_whatsnew_note.py | 76 +++++++++++++++++++ scripts/tests/test_sort_whatsnew_note.py | 30 ++++++++ scripts/validate_docstrings.py | 22 +++--- scripts/validate_min_versions_in_sync.py | 8 +- scripts/validate_rst_title_capitalization.py | 8 +- setup.py | 4 +- web/pandas_web.py | 8 +- 24 files changed, 237 insertions(+), 62 deletions(-) create mode 100644 scripts/sort_whatsnew_note.py create mode 100644 scripts/tests/test_sort_whatsnew_note.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e0468aa8137a2..484107af678a7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -436,3 +436,10 @@ repos: types: [python] files: ^pandas/tests language: python + - id: sort-whatsnew-items + name: sort whatsnew entries by issue number + entry: python -m scripts.sort_whatsnew_note + types: [rst] + language: python + files: ^doc/source/whatsnew/v + exclude: ^doc/source/whatsnew/v(0|1|2\.0\.0) diff --git a/doc/source/conf.py b/doc/source/conf.py index 6671cefae9073..72b1e7f775d8f 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -101,20 +101,20 @@ reldir = os.path.relpath(dirname, source_path) for fname in fnames: if os.path.splitext(fname)[-1] in (".rst", ".ipynb"): - fname = os.path.relpath(os.path.join(dirname, fname), source_path) + fname_new = os.path.relpath(os.path.join(dirname, fname), source_path) - if fname == "index.rst" and os.path.abspath(dirname) == source_path: + if fname_new == "index.rst" and os.path.abspath(dirname) == source_path: continue if pattern == "-api" and reldir.startswith("reference"): - exclude_patterns.append(fname) + exclude_patterns.append(fname_new) elif ( pattern == "whatsnew" and not reldir.startswith("reference") and reldir != "whatsnew" ): - exclude_patterns.append(fname) - elif single_doc and fname != pattern: - exclude_patterns.append(fname) + exclude_patterns.append(fname_new) + elif single_doc and fname_new != pattern: + exclude_patterns.append(fname_new) with open(os.path.join(source_path, "index.rst.template")) as f: t = jinja2.Template(f.read()) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index dd27fd9e128ae..c0ca5b2320338 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -92,6 +92,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ +- Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`) - Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`) - @@ -100,12 +101,12 @@ Deprecations Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ -- Performance improvement in :meth:`DataFrame.where` when ``cond`` is backed by an extension dtype (:issue:`51574`) -- Performance improvement in :meth:`~arrays.ArrowExtensionArray.isna` when array has zero nulls or is all nulls (:issue:`51630`) -- Performance improvement in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` for extension array dtypes (:issue:`51549`) -- Performance improvement in :meth:`DataFrame.clip` and :meth:`Series.clip` (:issue:`51472`) - Performance improvement in :func:`read_parquet` on string columns when using ``use_nullable_dtypes=True`` (:issue:`47345`) +- Performance improvement in :meth:`DataFrame.clip` and :meth:`Series.clip` (:issue:`51472`) +- Performance improvement in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` for extension array dtypes (:issue:`51549`) +- Performance improvement in :meth:`DataFrame.where` when ``cond`` is backed by an extension dtype (:issue:`51574`) - Performance improvement in :meth:`read_orc` when reading a remote URI file path. (:issue:`51609`) +- Performance improvement in :meth:`~arrays.ArrowExtensionArray.isna` when array has zero nulls or is all nulls (:issue:`51630`) .. --------------------------------------------------------------------------- .. _whatsnew_210.bug_fixes: diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 0149ea545a4c5..2dfabb31aa2dd 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -158,8 +158,8 @@ def _set_option(*args, **kwargs) -> None: o.validator(v) # walk the nested dict - root, k = _get_root(key) - root[k] = v + root, k_n = _get_root(key) + root[k_n] = v if o.cb: if silent: diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 1f701a871abe9..adb920e0cca6d 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -752,7 +752,7 @@ def is_monotonic(ndarray[numeric_object_t, ndim=1] arr, bint timelike): tuple is_monotonic_inc : bool is_monotonic_dec : bool - is_unique : bool + is_strict_monotonic : bool """ cdef: Py_ssize_t i, n diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 1b42ad1c0fda7..1050d15439ebf 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -256,24 +256,24 @@ cdef class IndexEngine: cdef _do_monotonic_check(self): cdef: - bint is_unique + bint is_strict_monotonic if self.mask is not None and np.any(self.mask): self.monotonic_inc = 0 self.monotonic_dec = 0 else: try: values = self.values - self.monotonic_inc, self.monotonic_dec, is_unique = \ + self.monotonic_inc, self.monotonic_dec, is_strict_monotonic = \ self._call_monotonic(values) except TypeError: self.monotonic_inc = 0 self.monotonic_dec = 0 - is_unique = 0 + is_strict_monotonic = 0 self.need_monotonic_check = 0 - # we can only be sure of uniqueness if is_unique=1 - if is_unique: + # we can only be sure of uniqueness if is_strict_monotonic=1 + if is_strict_monotonic: self.unique = 1 self.need_unique_check = 0 diff --git a/pandas/core/base.py b/pandas/core/base.py index 9f02e20b1605d..d9b2647d19f93 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -361,7 +361,7 @@ def item(self): Returns ------- scalar - The first element of %(klass)s. + The first element of Series. Raises ------ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 60c43b6cf0ecd..dca2d5676f71d 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -23,6 +23,7 @@ Union, cast, ) +import warnings import numpy as np @@ -49,6 +50,7 @@ Substitution, doc, ) +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( ensure_int64, @@ -270,6 +272,16 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) # pinned in _python_agg_general, only in _aggregate_named result = self._aggregate_named(func, *args, **kwargs) + warnings.warn( + "Pinning the groupby key to each group in " + f"{type(self).__name__}.agg is deprecated, and cases that " + "relied on it will raise in a future version. " + "If your operation requires utilizing the groupby keys, " + "iterate over the groupby object instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + # result is a dict whose keys are the elements of result_index result = Series(result, index=self.grouper.result_index) result = self._wrap_aggregated_output(result) @@ -407,6 +419,7 @@ def _aggregate_named(self, func, *args, **kwargs): for name, group in self.grouper.get_iterator( self._selected_obj, axis=self.axis ): + # needed for pandas/tests/groupby/test_groupby.py::test_basic_aggregations object.__setattr__(group, "name", name) output = func(group, *args, **kwargs) @@ -1537,6 +1550,7 @@ def _transform_general(self, func, *args, **kwargs): except StopIteration: pass else: + # 2023-02-27 No tests broken by disabling this pinning object.__setattr__(group, "name", name) try: path, res = self._choose_path(fast_path, slow_path, group) @@ -1552,6 +1566,7 @@ def _transform_general(self, func, *args, **kwargs): for name, group in gen: if group.size == 0: continue + # 2023-02-27 No tests broken by disabling this pinning object.__setattr__(group, "name", name) res = path(group) @@ -1721,6 +1736,8 @@ def filter(self, func, dropna: bool = True, *args, **kwargs): gen = self.grouper.get_iterator(obj, axis=self.axis) for name, group in gen: + # 2023-02-27 no tests are broken this pinning, but it is documented in the + # docstring above. object.__setattr__(group, "name", name) res = func(group, *args, **kwargs) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 726d75d705344..9a06a3da28e15 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -760,6 +760,11 @@ def apply_groupwise( zipped = zip(group_keys, splitter) for key, group in zipped: + # Pinning name is needed for + # test_group_apply_once_per_group, + # test_inconsistent_return_type, test_set_group_name, + # test_group_name_available_in_inference_pass, + # test_groupby_multi_timezone object.__setattr__(group, "name", key) # group might be modified diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 4dbdd5e5b77fe..005c166bb1f2a 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -567,10 +567,7 @@ def _homogenize( # Forces alignment. No need to copy data since we # are putting it into an ndarray later val = val.reindex(index, copy=False) - if isinstance(val._mgr, SingleBlockManager): - refs.append(val._mgr._block.refs) - else: - refs.append(None) + refs.append(val._references) val = val._values else: if isinstance(val, dict): diff --git a/pandas/core/series.py b/pandas/core/series.py index 06e9611c318cd..03d7b25aca49a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -34,6 +34,7 @@ properties, reshape, ) +from pandas._libs.internals import BlockValuesRefs from pandas._libs.lib import ( is_range_indexer, no_default, @@ -734,6 +735,12 @@ def _values(self): """ return self._mgr.internal_values() + @property + def _references(self) -> BlockValuesRefs | None: + if isinstance(self._mgr, SingleArrayManager): + return None + return self._mgr._block.refs + # error: Decorated property not supported @Appender(base.IndexOpsMixin.array.__doc__) # type: ignore[misc] @property diff --git a/pandas/io/orc.py b/pandas/io/orc.py index 3999fc5840f02..1b9be9adc1196 100644 --- a/pandas/io/orc.py +++ b/pandas/io/orc.py @@ -19,8 +19,16 @@ ReadBuffer, WriteBuffer, ) +from pandas.compat import pa_version_under8p0 from pandas.compat._optional import import_optional_dependency +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_interval_dtype, + is_period_dtype, + is_unsigned_integer_dtype, +) + from pandas.core.arrays import ArrowExtensionArray from pandas.core.frame import DataFrame @@ -201,6 +209,20 @@ def to_orc( if engine_kwargs is None: engine_kwargs = {} + # If unsupported dtypes are found raise NotImplementedError + # In Pyarrow 8.0.0 this check will no longer be needed + if pa_version_under8p0: + for dtype in df.dtypes: + if ( + is_categorical_dtype(dtype) + or is_interval_dtype(dtype) + or is_period_dtype(dtype) + or is_unsigned_integer_dtype(dtype) + ): + raise NotImplementedError( + "The dtype of one or more columns is not supported yet." + ) + if engine != "pyarrow": raise ValueError("engine must be 'pyarrow'") engine = import_optional_dependency(engine, min_version="7.0.0") diff --git a/pandas/tests/apply/test_frame_apply_relabeling.py b/pandas/tests/apply/test_frame_apply_relabeling.py index 2da4a78991f5a..95103de857399 100644 --- a/pandas/tests/apply/test_frame_apply_relabeling.py +++ b/pandas/tests/apply/test_frame_apply_relabeling.py @@ -95,3 +95,11 @@ def test_agg_namedtuple(): index=pd.Index(["foo", "bar", "cat"]), ) tm.assert_frame_equal(result, expected) + + +def test_reconstruct_func(): + # GH 28472, test to ensure reconstruct_func isn't moved; + # This method is used by other libraries (e.g. dask) + result = pd.core.apply.reconstruct_func("min") + expected = (False, "min", None, None) + tm.assert_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index e225ff5a0fa43..97e88a8545aa5 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -65,7 +65,7 @@ def test_groupby_std_datetimelike(): @pytest.mark.parametrize("dtype", ["int64", "int32", "float64", "float32"]) -def test_basic(dtype): +def test_basic_aggregations(dtype): data = Series(np.arange(9) // 3, index=np.arange(9), dtype=dtype) index = np.arange(9) @@ -102,7 +102,13 @@ def test_basic(dtype): grouped.aggregate({"one": np.mean, "two": np.std}) group_constants = {0: 10, 1: 20, 2: 30} - agged = grouped.agg(lambda x: group_constants[x.name] + x.mean()) + msg = ( + "Pinning the groupby key to each group in SeriesGroupBy.agg is deprecated, " + "and cases that relied on it will raise in a future version" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + # GH#41090 + agged = grouped.agg(lambda x: group_constants[x.name] + x.mean()) assert agged[1] == 21 # corner cases diff --git a/pandas/util/version/__init__.py b/pandas/util/version/__init__.py index 0b5e1d149daaa..9914263e479c5 100644 --- a/pandas/util/version/__init__.py +++ b/pandas/util/version/__init__.py @@ -253,16 +253,16 @@ def is_devrelease(self) -> bool: def _parse_version_parts(s: str) -> Iterator[str]: for part in _legacy_version_component_re.split(s): - part = _legacy_version_replacement_map.get(part, part) + part_y = _legacy_version_replacement_map.get(part, part) - if not part or part == ".": + if not part_y or part_y == ".": continue - if part[:1] in "0123456789": + if part_y[:1] in "0123456789": # pad for numeric comparison - yield part.zfill(8) + yield part_y.zfill(8) else: - yield "*" + part + yield "*" + part_y # ensure that alpha/beta/candidate are before final yield "*final" diff --git a/pyproject.toml b/pyproject.toml index b7e4a3cf2416f..f33b002a24716 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -271,8 +271,6 @@ ignore = [ "B904", # Magic number "PLR2004", - # Outer loop variable overwritten by inner assignment - "PLW2901", # Consider `elif` instead of `else` then `if` to remove indendation level "PLR5501", ] @@ -291,6 +289,7 @@ exclude = [ # relative imports allowed for asv_bench "asv_bench/*" = ["TID"] # TCH to be enabled gradually +"pandas/core/*" = ["PLW2901"] "pandas/core/arrays/*" = ["TCH"] "pandas/core/io/*" = ["TCH"] "pandas/core/indexers/*" = ["TCH"] @@ -318,10 +317,10 @@ exclude = [ "pandas/core/strings/*" = ["TCH"] "pandas/core/tools/*" = ["TCH"] "pandas/core/window/*" = ["TCH"] -"pandas/io/*" = ["TCH"] +"pandas/io/*" = ["TCH", "PLW2901"] "pandas/tseries/*" = ["TCH"] -"pandas/tests/*" = ["TCH"] -"pandas/plotting/*" = ["TCH"] +"pandas/tests/*" = ["TCH", "PLW2901"] +"pandas/plotting/*" = ["TCH", "PLW2901"] "pandas/util/*" = ["TCH"] "pandas/_libs/*" = ["TCH"] # Keep this one enabled diff --git a/scripts/no_bool_in_generic.py b/scripts/no_bool_in_generic.py index 92e2c0983b25b..4314dbb99be06 100644 --- a/scripts/no_bool_in_generic.py +++ b/scripts/no_bool_in_generic.py @@ -52,8 +52,8 @@ def replace_bool_with_bool_t(to_replace, content: str) -> str: for n, line in enumerate(content.splitlines(), start=1): if n in to_replace: for col_offset in reversed(to_replace[n]): - line = line[:col_offset] + "bool_t" + line[col_offset + 4 :] - new_lines.append(line) + line_new = line[:col_offset] + "bool_t" + line[col_offset + 4 :] + new_lines.append(line_new) return "\n".join(new_lines) diff --git a/scripts/sort_whatsnew_note.py b/scripts/sort_whatsnew_note.py new file mode 100644 index 0000000000000..e4ab44984b0d8 --- /dev/null +++ b/scripts/sort_whatsnew_note.py @@ -0,0 +1,76 @@ +""" +Sort whatsnew note blocks by issue number. + +NOTE: this assumes that each entry is on its own line, and ends with an issue number. +If that's not the case, then an entry might not get sorted. However, virtually all +recent-enough whatsnew entries follow this pattern. So, although not perfect, this +script should be good enough to significantly reduce merge conflicts. + +For example: + +- Fixed bug in resample (:issue:`321`) +- Fixed bug in groupby (:issue:`123`) + +would become + +- Fixed bug in groupby (:issue:`123`) +- Fixed bug in resample (:issue:`321`) + +The motivation is to reduce merge conflicts by reducing the chances that multiple +contributors will edit the same line of code. + +You can run this manually with + + pre-commit run sort-whatsnew-items --all-files +""" +from __future__ import annotations + +import argparse +import re +import sys +from typing import Sequence + +pattern = re.compile(r"\(:issue:`(\d+)`\)\n$") + + +def sort_whatsnew_note(content: str) -> int: + new_lines = [] + block: list[str] = [] + lines = content.splitlines(keepends=True) + for line in lines: + if line.startswith("- ") and pattern.search(line) is not None: + block.append(line) + else: + key = lambda x: int(pattern.search(x).group(1)) + block = sorted(block, key=key) + new_lines.extend(block) + new_lines.append(line) + block = [] + if sorted(new_lines) != sorted(lines): # pragma: no cover + # Defensive check - this script should only reorder lines, not modify any + # content. + raise AssertionError( + "Script modified content of file. Something is wrong, please don't " + "trust it." + ) + return "".join(new_lines) + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument("paths", nargs="*") + args = parser.parse_args(argv) + ret = 0 + for path in args.paths: + with open(path) as fd: + content = fd.read() + new_content = sort_whatsnew_note(content) + if content != new_content: + ret |= 1 + with open(path, "w") as fd: + fd.write(new_content) + return ret + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/tests/test_sort_whatsnew_note.py b/scripts/tests/test_sort_whatsnew_note.py new file mode 100644 index 0000000000000..6e40f6814c402 --- /dev/null +++ b/scripts/tests/test_sort_whatsnew_note.py @@ -0,0 +1,30 @@ +from scripts.sort_whatsnew_note import sort_whatsnew_note + + +def test_sort_whatsnew_note(): + content = ( + ".. _whatsnew_200:\n" + "\n" + "What's new in 2.0.0 (March XX, 2023)\n" + "------------------------------------\n" + "\n" + "Timedelta\n" + "^^^^^^^^^\n" + "- Bug in :class:`TimedeltaIndex` (:issue:`51575`)\n" + "- Bug in :meth:`Timedelta.round` (:issue:`51494`)\n" + "\n" + ) + expected = ( + ".. _whatsnew_200:\n" + "\n" + "What's new in 2.0.0 (March XX, 2023)\n" + "------------------------------------\n" + "\n" + "Timedelta\n" + "^^^^^^^^^\n" + "- Bug in :meth:`Timedelta.round` (:issue:`51494`)\n" + "- Bug in :class:`TimedeltaIndex` (:issue:`51575`)\n" + "\n" + ) + result = sort_whatsnew_note(content) + assert result == expected diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index a0eb02d069c58..f80afbbce5cfd 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -103,33 +103,33 @@ def get_api_items(api_doc_fd): previous_line = current_section = current_subsection = "" position = None for line in api_doc_fd: - line = line.strip() - if len(line) == len(previous_line): - if set(line) == set("-"): + line_strip = line.strip() + if len(line_strip) == len(previous_line): + if set(line_strip) == set("-"): current_section = previous_line continue - if set(line) == set("~"): + if set(line_strip) == set("~"): current_subsection = previous_line continue - if line.startswith(".. currentmodule::"): - current_module = line.replace(".. currentmodule::", "").strip() + if line_strip.startswith(".. currentmodule::"): + current_module = line_strip.replace(".. currentmodule::", "").strip() continue - if line == ".. autosummary::": + if line_strip == ".. autosummary::": position = "autosummary" continue if position == "autosummary": - if line == "": + if line_strip == "": position = "items" continue if position == "items": - if line == "": + if line_strip == "": position = None continue - item = line.strip() + item = line_strip.strip() if item in IGNORE_VALIDATION: continue func = importlib.import_module(current_module) @@ -143,7 +143,7 @@ def get_api_items(api_doc_fd): current_subsection, ) - previous_line = line + previous_line = line_strip class PandasDocstring(Validator): diff --git a/scripts/validate_min_versions_in_sync.py b/scripts/validate_min_versions_in_sync.py index 3c12f17fe72cf..e0d757506a1c5 100755 --- a/scripts/validate_min_versions_in_sync.py +++ b/scripts/validate_min_versions_in_sync.py @@ -181,11 +181,11 @@ def pin_min_versions_to_yaml_file( data = data.replace(old_dep, new_dep, 1) continue toml_version = version.parse(min_dep) - yaml_versions = clean_version_list(yaml_versions, toml_version) - cleaned_yaml_versions = [x for x in yaml_versions if "-" not in x] + yaml_versions_lst = clean_version_list(yaml_versions, toml_version) + cleaned_yaml_versions = [x for x in yaml_versions_lst if "-" not in x] new_dep = yaml_package - for yaml_version in cleaned_yaml_versions: - new_dep += yaml_version + ", " + for clean_yaml_version in cleaned_yaml_versions: + new_dep += clean_yaml_version + ", " operator = get_operator_from(new_dep) if operator != "=": new_dep += ">=" + min_dep diff --git a/scripts/validate_rst_title_capitalization.py b/scripts/validate_rst_title_capitalization.py index 44d59aecde718..933f42a9864f0 100755 --- a/scripts/validate_rst_title_capitalization.py +++ b/scripts/validate_rst_title_capitalization.py @@ -229,15 +229,15 @@ def find_titles(rst_file: str) -> Iterable[tuple[str, int]]: with open(rst_file) as fd: previous_line = "" for i, line in enumerate(fd): - line = line[:-1] - line_chars = set(line) + no_end_line = line[:-1] + line_chars = set(no_end_line) if ( len(line_chars) == 1 and line_chars.pop() in symbols - and len(line) == len(previous_line) + and len(no_end_line) == len(previous_line) ): yield re.sub(r"[`\*_]", "", previous_line), i - previous_line = line + previous_line = no_end_line def main(source_paths: list[str]) -> int: diff --git a/setup.py b/setup.py index b6dfcc5fbdb0d..1bbb11669c5c8 100755 --- a/setup.py +++ b/setup.py @@ -158,8 +158,8 @@ def initialize_options(self): # clean the generated pxi files for pxifile in _pxifiles: - pxifile = pxifile.replace(".pxi.in", ".pxi") - self._clean_me.append(pxifile) + pxifile_new = pxifile.replace(".pxi.in", ".pxi") + self._clean_me.append(pxifile_new) for d in ("build", "dist"): if os.path.exists(d): diff --git a/web/pandas_web.py b/web/pandas_web.py index d1f06b6fdfa43..54506baa0301d 100755 --- a/web/pandas_web.py +++ b/web/pandas_web.py @@ -369,9 +369,9 @@ def get_source_files(source_path: str) -> typing.Generator[str, None, None]: Generate the list of files present in the source directory. """ for root, dirs, fnames in os.walk(source_path): - root = os.path.relpath(root, source_path) + root_new = os.path.relpath(root, source_path) for fname in fnames: - yield os.path.join(root, fname) + yield os.path.join(root_new, fname) def extend_base_template(content: str, base_template: str) -> str: @@ -430,8 +430,8 @@ def main( content = extend_base_template(body, context["main"]["base_template"]) context["base_url"] = "".join(["../"] * os.path.normpath(fname).count("/")) content = jinja_env.from_string(content).render(**context) - fname = os.path.splitext(fname)[0] + ".html" - with open(os.path.join(target_path, fname), "w") as f: + fname_split = os.path.splitext(fname)[0] + ".html" + with open(os.path.join(target_path, fname_split), "w") as f: f.write(content) else: shutil.copy( From 75b087820cae44f80a4b62a967eb87bce0bbbed6 Mon Sep 17 00:00:00 2001 From: ErdiTk <24619655+ErdiTk@users.noreply.github.com> Date: Mon, 6 Mar 2023 23:00:01 +0100 Subject: [PATCH 2/5] Variables renamed in a more intuitive manner. --- doc/source/conf.py | 12 +++++------ pandas/_config/config.py | 4 ++-- pandas/util/version/__init__.py | 10 ++++----- scripts/no_bool_in_generic.py | 4 ++-- scripts/validate_docstrings.py | 22 ++++++++++---------- scripts/validate_min_versions_in_sync.py | 4 ++-- scripts/validate_rst_title_capitalization.py | 8 +++---- setup.py | 4 ++-- web/pandas_web.py | 8 +++---- 9 files changed, 38 insertions(+), 38 deletions(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index 72b1e7f775d8f..c73a91aa90365 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -101,20 +101,20 @@ reldir = os.path.relpath(dirname, source_path) for fname in fnames: if os.path.splitext(fname)[-1] in (".rst", ".ipynb"): - fname_new = os.path.relpath(os.path.join(dirname, fname), source_path) + rel_fname = os.path.relpath(os.path.join(dirname, fname), source_path) - if fname_new == "index.rst" and os.path.abspath(dirname) == source_path: + if rel_fname == "index.rst" and os.path.abspath(dirname) == source_path: continue if pattern == "-api" and reldir.startswith("reference"): - exclude_patterns.append(fname_new) + exclude_patterns.append(rel_fname) elif ( pattern == "whatsnew" and not reldir.startswith("reference") and reldir != "whatsnew" ): - exclude_patterns.append(fname_new) - elif single_doc and fname_new != pattern: - exclude_patterns.append(fname_new) + exclude_patterns.append(rel_fname) + elif single_doc and rel_fname != pattern: + exclude_patterns.append(rel_fname) with open(os.path.join(source_path, "index.rst.template")) as f: t = jinja2.Template(f.read()) diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 2dfabb31aa2dd..56d505d024949 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -158,8 +158,8 @@ def _set_option(*args, **kwargs) -> None: o.validator(v) # walk the nested dict - root, k_n = _get_root(key) - root[k_n] = v + root, k_root = _get_root(key) + root[k_root] = v if o.cb: if silent: diff --git a/pandas/util/version/__init__.py b/pandas/util/version/__init__.py index 9914263e479c5..ea0047f6cfd77 100644 --- a/pandas/util/version/__init__.py +++ b/pandas/util/version/__init__.py @@ -253,16 +253,16 @@ def is_devrelease(self) -> bool: def _parse_version_parts(s: str) -> Iterator[str]: for part in _legacy_version_component_re.split(s): - part_y = _legacy_version_replacement_map.get(part, part) + mapped_part = _legacy_version_replacement_map.get(part, part) - if not part_y or part_y == ".": + if not mapped_part or mapped_part == ".": continue - if part_y[:1] in "0123456789": + if mapped_part[:1] in "0123456789": # pad for numeric comparison - yield part_y.zfill(8) + yield mapped_part.zfill(8) else: - yield "*" + part_y + yield "*" + mapped_part # ensure that alpha/beta/candidate are before final yield "*final" diff --git a/scripts/no_bool_in_generic.py b/scripts/no_bool_in_generic.py index 4314dbb99be06..1cd5fe6996719 100644 --- a/scripts/no_bool_in_generic.py +++ b/scripts/no_bool_in_generic.py @@ -52,8 +52,8 @@ def replace_bool_with_bool_t(to_replace, content: str) -> str: for n, line in enumerate(content.splitlines(), start=1): if n in to_replace: for col_offset in reversed(to_replace[n]): - line_new = line[:col_offset] + "bool_t" + line[col_offset + 4 :] - new_lines.append(line_new) + line_bool_t = line[:col_offset] + "bool_t" + line[col_offset + 4 :] + new_lines.append(line_bool_t) return "\n".join(new_lines) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index f80afbbce5cfd..487fe44e4d9bc 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -103,33 +103,33 @@ def get_api_items(api_doc_fd): previous_line = current_section = current_subsection = "" position = None for line in api_doc_fd: - line_strip = line.strip() - if len(line_strip) == len(previous_line): - if set(line_strip) == set("-"): + line_stripped = line.strip() + if len(line_stripped) == len(previous_line): + if set(line_stripped) == set("-"): current_section = previous_line continue - if set(line_strip) == set("~"): + if set(line_stripped) == set("~"): current_subsection = previous_line continue - if line_strip.startswith(".. currentmodule::"): - current_module = line_strip.replace(".. currentmodule::", "").strip() + if line_stripped.startswith(".. currentmodule::"): + current_module = line_stripped.replace(".. currentmodule::", "").strip() continue - if line_strip == ".. autosummary::": + if line_stripped == ".. autosummary::": position = "autosummary" continue if position == "autosummary": - if line_strip == "": + if line_stripped == "": position = "items" continue if position == "items": - if line_strip == "": + if line_stripped == "": position = None continue - item = line_strip.strip() + item = line_stripped.strip() if item in IGNORE_VALIDATION: continue func = importlib.import_module(current_module) @@ -143,7 +143,7 @@ def get_api_items(api_doc_fd): current_subsection, ) - previous_line = line_strip + previous_line = line_stripped class PandasDocstring(Validator): diff --git a/scripts/validate_min_versions_in_sync.py b/scripts/validate_min_versions_in_sync.py index e0d757506a1c5..637ddf68995b4 100755 --- a/scripts/validate_min_versions_in_sync.py +++ b/scripts/validate_min_versions_in_sync.py @@ -181,8 +181,8 @@ def pin_min_versions_to_yaml_file( data = data.replace(old_dep, new_dep, 1) continue toml_version = version.parse(min_dep) - yaml_versions_lst = clean_version_list(yaml_versions, toml_version) - cleaned_yaml_versions = [x for x in yaml_versions_lst if "-" not in x] + yaml_versions_list = clean_version_list(yaml_versions, toml_version) + cleaned_yaml_versions = [x for x in yaml_versions_list if "-" not in x] new_dep = yaml_package for clean_yaml_version in cleaned_yaml_versions: new_dep += clean_yaml_version + ", " diff --git a/scripts/validate_rst_title_capitalization.py b/scripts/validate_rst_title_capitalization.py index 933f42a9864f0..4446ed62f6b8a 100755 --- a/scripts/validate_rst_title_capitalization.py +++ b/scripts/validate_rst_title_capitalization.py @@ -229,15 +229,15 @@ def find_titles(rst_file: str) -> Iterable[tuple[str, int]]: with open(rst_file) as fd: previous_line = "" for i, line in enumerate(fd): - no_end_line = line[:-1] - line_chars = set(no_end_line) + line_no_last_elem = line[:-1] + line_chars = set(line_no_last_elem) if ( len(line_chars) == 1 and line_chars.pop() in symbols - and len(no_end_line) == len(previous_line) + and len(line_no_last_elem) == len(previous_line) ): yield re.sub(r"[`\*_]", "", previous_line), i - previous_line = no_end_line + previous_line = line_no_last_elem def main(source_paths: list[str]) -> int: diff --git a/setup.py b/setup.py index 1bbb11669c5c8..0c646469700da 100755 --- a/setup.py +++ b/setup.py @@ -158,8 +158,8 @@ def initialize_options(self): # clean the generated pxi files for pxifile in _pxifiles: - pxifile_new = pxifile.replace(".pxi.in", ".pxi") - self._clean_me.append(pxifile_new) + pxifile_replaced = pxifile.replace(".pxi.in", ".pxi") + self._clean_me.append(pxifile_replaced) for d in ("build", "dist"): if os.path.exists(d): diff --git a/web/pandas_web.py b/web/pandas_web.py index 54506baa0301d..13971f81bd8f3 100755 --- a/web/pandas_web.py +++ b/web/pandas_web.py @@ -369,9 +369,9 @@ def get_source_files(source_path: str) -> typing.Generator[str, None, None]: Generate the list of files present in the source directory. """ for root, dirs, fnames in os.walk(source_path): - root_new = os.path.relpath(root, source_path) + root_rel_path = os.path.relpath(root, source_path) for fname in fnames: - yield os.path.join(root_new, fname) + yield os.path.join(root_rel_path, fname) def extend_base_template(content: str, base_template: str) -> str: @@ -430,8 +430,8 @@ def main( content = extend_base_template(body, context["main"]["base_template"]) context["base_url"] = "".join(["../"] * os.path.normpath(fname).count("/")) content = jinja_env.from_string(content).render(**context) - fname_split = os.path.splitext(fname)[0] + ".html" - with open(os.path.join(target_path, fname_split), "w") as f: + fname_splitted = os.path.splitext(fname)[0] + ".html" + with open(os.path.join(target_path, fname_splitted), "w") as f: f.write(content) else: shutil.copy( From b11ad01e9996a25ba34b8258c664f98300f60458 Mon Sep 17 00:00:00 2001 From: ErdiTk <24619655+ErdiTk@users.noreply.github.com> Date: Tue, 7 Mar 2023 22:03:19 +0100 Subject: [PATCH 3/5] Renamed fname_splitted to fname_html in web/pandas_web.py --- web/pandas_web.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/pandas_web.py b/web/pandas_web.py index 13971f81bd8f3..5e902f1b1919b 100755 --- a/web/pandas_web.py +++ b/web/pandas_web.py @@ -430,8 +430,8 @@ def main( content = extend_base_template(body, context["main"]["base_template"]) context["base_url"] = "".join(["../"] * os.path.normpath(fname).count("/")) content = jinja_env.from_string(content).render(**context) - fname_splitted = os.path.splitext(fname)[0] + ".html" - with open(os.path.join(target_path, fname_splitted), "w") as f: + fname_html = os.path.splitext(fname)[0] + ".html" + with open(os.path.join(target_path, fname_html), "w") as f: f.write(content) else: shutil.copy( From 9ac4fab1546f8b32613a29d06f05fbddd861aadc Mon Sep 17 00:00:00 2001 From: ErdiTk <24619655+ErdiTk@users.noreply.github.com> Date: Tue, 7 Mar 2023 23:28:09 +0100 Subject: [PATCH 4/5] Removed duplicate pandas/core/* --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index fb01db5912d77..5e683870ff3bb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -289,9 +289,8 @@ exclude = [ # relative imports allowed for asv_bench "asv_bench/*" = ["TID"] # to be enabled gradually -"pandas/core/*" = ["PLR5501"] +"pandas/core/*" = ["PLR5501", "PLW2901"] # TCH to be enabled gradually -"pandas/core/*" = ["PLW2901"] "pandas/core/arrays/*" = ["TCH"] "pandas/core/nanops.py" = ["TCH"] "pandas/core/apply.py" = ["TCH"] From 4b3addf958fb5e7b87866febf99417f6484c8084 Mon Sep 17 00:00:00 2001 From: ErdiTk <24619655+ErdiTk@users.noreply.github.com> Date: Wed, 8 Mar 2023 22:38:19 +0100 Subject: [PATCH 5/5] Adjusted replace_bool_with_bool_t from previously introduced bug. --- scripts/no_bool_in_generic.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/no_bool_in_generic.py b/scripts/no_bool_in_generic.py index 1cd5fe6996719..1427974249702 100644 --- a/scripts/no_bool_in_generic.py +++ b/scripts/no_bool_in_generic.py @@ -50,10 +50,15 @@ def replace_bool_with_bool_t(to_replace, content: str) -> str: new_lines = [] for n, line in enumerate(content.splitlines(), start=1): + replaced_line = line if n in to_replace: for col_offset in reversed(to_replace[n]): - line_bool_t = line[:col_offset] + "bool_t" + line[col_offset + 4 :] - new_lines.append(line_bool_t) + replaced_line = ( + replaced_line[:col_offset] + + "bool_t" + + replaced_line[col_offset + 4 :] + ) + new_lines.append(replaced_line) return "\n".join(new_lines)