Merge pull request #165 from pandas-dev/master

sthagen · web-flow · commit f2040809df30 · 2021-04-05T15:47:04.000+02:00
Sync Fork from Upstream Repo
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -90,11 +90,6 @@ repos:
         entry: python scripts/check_for_inconsistent_pandas_namespace.py
         language: python
         types: [python]
-    -   id: incorrect-code-directives
-        name: Check for incorrect code block or IPython directives
-        language: pygrep
-        entry: (\.\. code-block ::|\.\. ipython ::)
-        types_or: [python, cython, rst]
     -   id: no-os-remove
         name: Check code for instances of os.remove
         entry: os\.remove
@@ -106,49 +101,60 @@ repos:
             pandas/tests/io/excel/test_writers\.py
             |pandas/tests/io/pytables/common\.py
             |pandas/tests/io/pytables/test_store\.py$
-    -   id: non-standard-imports
-        name: Check for non-standard imports
+    -   id: unwanted-patterns
+        name: Unwanted patterns
         language: pygrep
         entry: |
             (?x)
-            # Check for imports from pandas.core.common instead of `import pandas.core.common as com`
-            from\ pandas\.core\.common\ import
+            # outdated annotation syntax, missing error codes
+            \#\ type:\ (?!ignore)
+            |\#\ type:\s?ignore(?!\[)
+
+            # foo._class__ instead of type(foo)
+            |\.__class__
+
+            # np.bool/np.object instead of np.bool_/np.object_
+            |np\.bool[^_8]
+            |np\.object[^_8]
+
+            # imports from pandas.core.common instead of `import pandas.core.common as com`
+            |from\ pandas\.core\.common\ import
             |from\ pandas\.core\ import\ common
 
-            # Check for imports from collections.abc instead of `from collections import abc`
+            # imports from collections.abc instead of `from collections import abc`
             |from\ collections\.abc\ import
 
             # Numpy
             |from\ numpy\ import\ random
             |from\ numpy\.random\ import
-        types: [python]
-    -   id: non-standard-imports-in-tests
-        name: Check for non-standard imports in test suite
+
+            # Incorrect code-block / IPython directives
+            |\.\.\ code-block\ ::
+            |\.\.\ ipython\ ::
+        types_or: [python, cython, rst]
+        exclude: ^doc/source/development/code_style\.rst  # contains examples of patterns to avoid
+    -   id: unwanted-patterns-in-tests
+        name: Unwanted patterns in tests
         language: pygrep
         entry: |
             (?x)
-            # Check for imports from pandas._testing instead of `import pandas._testing as tm`
-            from\ pandas\._testing\ import
+            # pytest.xfail instead of pytest.mark.xfail
+            pytest\.xfail
+
+            # imports from pandas._testing instead of `import pandas._testing as tm`
+            |from\ pandas\._testing\ import
             |from\ pandas\ import\ _testing\ as\ tm
 
             # No direct imports from conftest
             |conftest\ import
             |import\ conftest
 
-            # Check for use of pandas.testing instead of tm
+            # pandas.testing instead of tm
             |pd\.testing\.
 
-            # Check for pd.api.types instead of from pandas.api.types import ...
+            # pd.api.types instead of from pandas.api.types import ...
             |(pd|pandas)\.api\.types\.
-        types: [python]
         files: ^pandas/tests/
-    -   id: np-bool-and-np-object
-        name: Check for use of np.bool/np.object instead of np.bool_/np.object_
-        entry: |
-            (?x)
-            np\.bool[^_8]
-            |np\.object[^_8]
-        language: pygrep
         types_or: [python, cython, rst]
     -   id: pip-to-conda
         name: Generate pip dependency from conda
@@ -164,11 +170,6 @@ repos:
         language: python
         types: [rst]
         files: ^doc/source/(development|reference)/
-    -   id: type-not-class
-        name: Check for use of foo.__class__ instead of type(foo)
-        entry: \.__class__
-        language: pygrep
-        types_or: [python, cython]
     -   id: unwanted-patterns-bare-pytest-raises
         name: Check for use of bare pytest raises
         language: python
@@ -188,12 +189,6 @@ repos:
         entry: python scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module"
         types: [python]
         exclude: ^(asv_bench|pandas/tests|doc)/
-    -   id: unwanted-patterns-pytest-xfail
-        name: Check for use of pytest.xfail
-        entry: pytest\.xfail
-        language: pygrep
-        types: [python]
-        files: ^pandas/tests/
     -   id: unwanted-patterns-strings-to-concatenate
         name: Check for use of not concatenated strings
         language: python
@@ -204,14 +199,6 @@ repos:
         language: python
         entry: python scripts/validate_unwanted_patterns.py --validation-type="strings_with_wrong_placed_whitespace"
         types_or: [python, cython]
-    -   id: unwanted-typing
-        name: Check for outdated annotation syntax and missing error codes
-        entry: |
-            (?x)
-            \#\ type:\ (?!ignore)
-            |\#\ type:\s?ignore(?!\[)
-        language: pygrep
-        types: [python]
     -   id: use-pd_array-in-core
         name: Import pandas.array as pd_array in core
         language: python
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -336,7 +336,7 @@ def get_iterator(
         """
         splitter = self._get_splitter(data, axis=axis)
         keys = self._get_group_keys()
-        for key, (i, group) in zip(keys, splitter):
+        for key, group in zip(keys, splitter):
             yield key, group.__finalize__(data, method="groupby")
 
     @final
@@ -411,21 +411,27 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
                 if len(result_values) == len(group_keys):
                     return group_keys, result_values, mutated
 
-        for key, (i, group) in zip(group_keys, splitter):
-            object.__setattr__(group, "name", key)
-
+        if result_values is None:
             # result_values is None if fast apply path wasn't taken
             # or fast apply aborted with an unexpected exception.
             # In either case, initialize the result list and perform
             # the slow iteration.
-            if result_values is None:
-                result_values = []
-
+            result_values = []
+            skip_first = False
+        else:
             # If result_values is not None we're in the case that the
             # fast apply loop was broken prematurely but we have
             # already the result for the first group which we can reuse.
-            elif i == 0:
-                continue
+            skip_first = True
+
+        # This calls DataSplitter.__iter__
+        zipped = zip(group_keys, splitter)
+        if skip_first:
+            # pop the first item from the front of the iterator
+            next(zipped)
+
+        for key, group in zipped:
+            object.__setattr__(group, "name", key)
 
             # group might be modified
             group_axes = group.axes
@@ -779,7 +785,7 @@ def _aggregate_series_pure_python(self, obj: Series, func: F):
 
         splitter = get_splitter(obj, group_index, ngroups, axis=0)
 
-        for label, group in splitter:
+        for label, group in enumerate(splitter):
 
             # Each step of this loop corresponds to
             #  libreduction._BaseGrouper._apply_to_group
@@ -1012,8 +1018,8 @@ def __iter__(self):
 
         starts, ends = lib.generate_slices(self.slabels, self.ngroups)
 
-        for i, (start, end) in enumerate(zip(starts, ends)):
-            yield i, self._chop(sdata, slice(start, end))
+        for start, end in zip(starts, ends):
+            yield self._chop(sdata, slice(start, end))
 
     @cache_readonly
     def sorted_data(self) -> FrameOrSeries: