diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c63f50b3c1421..057457e6d183e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -90,11 +90,6 @@ repos: entry: python scripts/check_for_inconsistent_pandas_namespace.py language: python types: [python] - - id: incorrect-code-directives - name: Check for incorrect code block or IPython directives - language: pygrep - entry: (\.\. code-block ::|\.\. ipython ::) - types_or: [python, cython, rst] - id: no-os-remove name: Check code for instances of os.remove entry: os\.remove @@ -106,49 +101,60 @@ repos: pandas/tests/io/excel/test_writers\.py |pandas/tests/io/pytables/common\.py |pandas/tests/io/pytables/test_store\.py$ - - id: non-standard-imports - name: Check for non-standard imports + - id: unwanted-patterns + name: Unwanted patterns language: pygrep entry: | (?x) - # Check for imports from pandas.core.common instead of `import pandas.core.common as com` - from\ pandas\.core\.common\ import + # outdated annotation syntax, missing error codes + \#\ type:\ (?!ignore) + |\#\ type:\s?ignore(?!\[) + + # foo._class__ instead of type(foo) + |\.__class__ + + # np.bool/np.object instead of np.bool_/np.object_ + |np\.bool[^_8] + |np\.object[^_8] + + # imports from pandas.core.common instead of `import pandas.core.common as com` + |from\ pandas\.core\.common\ import |from\ pandas\.core\ import\ common - # Check for imports from collections.abc instead of `from collections import abc` + # imports from collections.abc instead of `from collections import abc` |from\ collections\.abc\ import # Numpy |from\ numpy\ import\ random |from\ numpy\.random\ import - types: [python] - - id: non-standard-imports-in-tests - name: Check for non-standard imports in test suite + + # Incorrect code-block / IPython directives + |\.\.\ code-block\ :: + |\.\.\ ipython\ :: + types_or: [python, cython, rst] + exclude: ^doc/source/development/code_style\.rst # contains examples of patterns to avoid + - id: unwanted-patterns-in-tests + name: Unwanted patterns in tests language: pygrep entry: | (?x) - # Check for imports from pandas._testing instead of `import pandas._testing as tm` - from\ pandas\._testing\ import + # pytest.xfail instead of pytest.mark.xfail + pytest\.xfail + + # imports from pandas._testing instead of `import pandas._testing as tm` + |from\ pandas\._testing\ import |from\ pandas\ import\ _testing\ as\ tm # No direct imports from conftest |conftest\ import |import\ conftest - # Check for use of pandas.testing instead of tm + # pandas.testing instead of tm |pd\.testing\. - # Check for pd.api.types instead of from pandas.api.types import ... + # pd.api.types instead of from pandas.api.types import ... |(pd|pandas)\.api\.types\. - types: [python] files: ^pandas/tests/ - - id: np-bool-and-np-object - name: Check for use of np.bool/np.object instead of np.bool_/np.object_ - entry: | - (?x) - np\.bool[^_8] - |np\.object[^_8] - language: pygrep types_or: [python, cython, rst] - id: pip-to-conda name: Generate pip dependency from conda @@ -164,11 +170,6 @@ repos: language: python types: [rst] files: ^doc/source/(development|reference)/ - - id: type-not-class - name: Check for use of foo.__class__ instead of type(foo) - entry: \.__class__ - language: pygrep - types_or: [python, cython] - id: unwanted-patterns-bare-pytest-raises name: Check for use of bare pytest raises language: python @@ -188,12 +189,6 @@ repos: entry: python scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" types: [python] exclude: ^(asv_bench|pandas/tests|doc)/ - - id: unwanted-patterns-pytest-xfail - name: Check for use of pytest.xfail - entry: pytest\.xfail - language: pygrep - types: [python] - files: ^pandas/tests/ - id: unwanted-patterns-strings-to-concatenate name: Check for use of not concatenated strings language: python @@ -204,14 +199,6 @@ repos: language: python entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace" types_or: [python, cython] - - id: unwanted-typing - name: Check for outdated annotation syntax and missing error codes - entry: | - (?x) - \#\ type:\ (?!ignore) - |\#\ type:\s?ignore(?!\[) - language: pygrep - types: [python] - id: use-pd_array-in-core name: Import pandas.array as pd_array in core language: python diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 4b68717763d87..d3273d7f13a4d 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -336,7 +336,7 @@ def get_iterator( """ splitter = self._get_splitter(data, axis=axis) keys = self._get_group_keys() - for key, (i, group) in zip(keys, splitter): + for key, group in zip(keys, splitter): yield key, group.__finalize__(data, method="groupby") @final @@ -411,21 +411,27 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0): if len(result_values) == len(group_keys): return group_keys, result_values, mutated - for key, (i, group) in zip(group_keys, splitter): - object.__setattr__(group, "name", key) - + if result_values is None: # result_values is None if fast apply path wasn't taken # or fast apply aborted with an unexpected exception. # In either case, initialize the result list and perform # the slow iteration. - if result_values is None: - result_values = [] - + result_values = [] + skip_first = False + else: # If result_values is not None we're in the case that the # fast apply loop was broken prematurely but we have # already the result for the first group which we can reuse. - elif i == 0: - continue + skip_first = True + + # This calls DataSplitter.__iter__ + zipped = zip(group_keys, splitter) + if skip_first: + # pop the first item from the front of the iterator + next(zipped) + + for key, group in zipped: + object.__setattr__(group, "name", key) # group might be modified group_axes = group.axes @@ -779,7 +785,7 @@ def _aggregate_series_pure_python(self, obj: Series, func: F): splitter = get_splitter(obj, group_index, ngroups, axis=0) - for label, group in splitter: + for label, group in enumerate(splitter): # Each step of this loop corresponds to # libreduction._BaseGrouper._apply_to_group @@ -1012,8 +1018,8 @@ def __iter__(self): starts, ends = lib.generate_slices(self.slabels, self.ngroups) - for i, (start, end) in enumerate(zip(starts, ends)): - yield i, self._chop(sdata, slice(start, end)) + for start, end in zip(starts, ends): + yield self._chop(sdata, slice(start, end)) @cache_readonly def sorted_data(self) -> FrameOrSeries: