diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e0468aa8137a2..484107af678a7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -436,3 +436,10 @@ repos: types: [python] files: ^pandas/tests language: python + - id: sort-whatsnew-items + name: sort whatsnew entries by issue number + entry: python -m scripts.sort_whatsnew_note + types: [rst] + language: python + files: ^doc/source/whatsnew/v + exclude: ^doc/source/whatsnew/v(0|1|2\.0\.0) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index dd27fd9e128ae..68159cd211a5e 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -100,12 +100,12 @@ Deprecations Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ -- Performance improvement in :meth:`DataFrame.where` when ``cond`` is backed by an extension dtype (:issue:`51574`) -- Performance improvement in :meth:`~arrays.ArrowExtensionArray.isna` when array has zero nulls or is all nulls (:issue:`51630`) -- Performance improvement in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` for extension array dtypes (:issue:`51549`) -- Performance improvement in :meth:`DataFrame.clip` and :meth:`Series.clip` (:issue:`51472`) - Performance improvement in :func:`read_parquet` on string columns when using ``use_nullable_dtypes=True`` (:issue:`47345`) +- Performance improvement in :meth:`DataFrame.clip` and :meth:`Series.clip` (:issue:`51472`) +- Performance improvement in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` for extension array dtypes (:issue:`51549`) +- Performance improvement in :meth:`DataFrame.where` when ``cond`` is backed by an extension dtype (:issue:`51574`) - Performance improvement in :meth:`read_orc` when reading a remote URI file path. (:issue:`51609`) +- Performance improvement in :meth:`~arrays.ArrowExtensionArray.isna` when array has zero nulls or is all nulls (:issue:`51630`) .. --------------------------------------------------------------------------- .. _whatsnew_210.bug_fixes: diff --git a/scripts/sort_whatsnew_note.py b/scripts/sort_whatsnew_note.py new file mode 100644 index 0000000000000..e4ab44984b0d8 --- /dev/null +++ b/scripts/sort_whatsnew_note.py @@ -0,0 +1,76 @@ +""" +Sort whatsnew note blocks by issue number. + +NOTE: this assumes that each entry is on its own line, and ends with an issue number. +If that's not the case, then an entry might not get sorted. However, virtually all +recent-enough whatsnew entries follow this pattern. So, although not perfect, this +script should be good enough to significantly reduce merge conflicts. + +For example: + +- Fixed bug in resample (:issue:`321`) +- Fixed bug in groupby (:issue:`123`) + +would become + +- Fixed bug in groupby (:issue:`123`) +- Fixed bug in resample (:issue:`321`) + +The motivation is to reduce merge conflicts by reducing the chances that multiple +contributors will edit the same line of code. + +You can run this manually with + + pre-commit run sort-whatsnew-items --all-files +""" +from __future__ import annotations + +import argparse +import re +import sys +from typing import Sequence + +pattern = re.compile(r"\(:issue:`(\d+)`\)\n$") + + +def sort_whatsnew_note(content: str) -> int: + new_lines = [] + block: list[str] = [] + lines = content.splitlines(keepends=True) + for line in lines: + if line.startswith("- ") and pattern.search(line) is not None: + block.append(line) + else: + key = lambda x: int(pattern.search(x).group(1)) + block = sorted(block, key=key) + new_lines.extend(block) + new_lines.append(line) + block = [] + if sorted(new_lines) != sorted(lines): # pragma: no cover + # Defensive check - this script should only reorder lines, not modify any + # content. + raise AssertionError( + "Script modified content of file. Something is wrong, please don't " + "trust it." + ) + return "".join(new_lines) + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument("paths", nargs="*") + args = parser.parse_args(argv) + ret = 0 + for path in args.paths: + with open(path) as fd: + content = fd.read() + new_content = sort_whatsnew_note(content) + if content != new_content: + ret |= 1 + with open(path, "w") as fd: + fd.write(new_content) + return ret + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/tests/test_sort_whatsnew_note.py b/scripts/tests/test_sort_whatsnew_note.py new file mode 100644 index 0000000000000..6e40f6814c402 --- /dev/null +++ b/scripts/tests/test_sort_whatsnew_note.py @@ -0,0 +1,30 @@ +from scripts.sort_whatsnew_note import sort_whatsnew_note + + +def test_sort_whatsnew_note(): + content = ( + ".. _whatsnew_200:\n" + "\n" + "What's new in 2.0.0 (March XX, 2023)\n" + "------------------------------------\n" + "\n" + "Timedelta\n" + "^^^^^^^^^\n" + "- Bug in :class:`TimedeltaIndex` (:issue:`51575`)\n" + "- Bug in :meth:`Timedelta.round` (:issue:`51494`)\n" + "\n" + ) + expected = ( + ".. _whatsnew_200:\n" + "\n" + "What's new in 2.0.0 (March XX, 2023)\n" + "------------------------------------\n" + "\n" + "Timedelta\n" + "^^^^^^^^^\n" + "- Bug in :meth:`Timedelta.round` (:issue:`51494`)\n" + "- Bug in :class:`TimedeltaIndex` (:issue:`51575`)\n" + "\n" + ) + result = sort_whatsnew_note(content) + assert result == expected