sthagen · sthagen · Mar 20, 2021 · Mar 19, 2021 · Mar 19, 2021 · Mar 19, 2021
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -141,6 +141,9 @@ jobs:
   data_manager:
     name: Test experimental data manager
     runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        pattern: ["not slow and not network and not clipboard", "slow"]
     steps:
 
     - name: Checkout
@@ -152,43 +155,11 @@ jobs:
     - name: Run tests
       env:
         PANDAS_DATA_MANAGER: array
+        PATTERN: ${{ matrix.pattern }}
+        PYTEST_WORKERS: "auto"
       run: |
         source activate pandas-dev
+        ci/run_tests.sh
 
-        pytest pandas/tests/frame/
-        pytest pandas/tests/reductions/
-        pytest pandas/tests/generic/test_generic.py
-        pytest pandas/tests/arithmetic/
-        pytest pandas/tests/groupby/
-        pytest pandas/tests/resample/
-        pytest pandas/tests/reshape/merge
-        pytest pandas/tests/series/
-        pytest pandas/tests/indexing/
-
-        pytest pandas/tests/test_*
-        pytest pandas/tests/api/
-        pytest pandas/tests/apply/
-        pytest pandas/tests/arrays/
-        pytest pandas/tests/base/
-        pytest pandas/tests/computation/
-        pytest pandas/tests/config/
-        pytest pandas/tests/dtypes/
-        pytest pandas/tests/extension/
-        pytest pandas/tests/generic/
-        pytest pandas/tests/indexes/
-        pytest pandas/tests/internals/
-        pytest pandas/tests/io/test_* -m "not slow and not clipboard"
-        pytest pandas/tests/io/excel/ -m "not slow and not clipboard"
-        pytest pandas/tests/io/formats/ -m "not slow and not clipboard"
-        pytest pandas/tests/io/parser/ -m "not slow and not clipboard"
-        pytest pandas/tests/io/sas/ -m "not slow and not clipboard"
-        pytest pandas/tests/io/xml/ -m "not slow and not clipboard"
-        pytest pandas/tests/libs/
-        pytest pandas/tests/plotting/
-        pytest pandas/tests/scalar/
-        pytest pandas/tests/strings/
-        pytest pandas/tests/tools/
-        pytest pandas/tests/tseries/
-        pytest pandas/tests/tslibs/
-        pytest pandas/tests/util/
-        pytest pandas/tests/window/
+    - name: Print skipped tests
+      run: python ci/print_skipped.py
diff --git a/.github/workflows/database.yml b/.github/workflows/database.yml
@@ -12,17 +12,19 @@ env:
   PYTEST_WORKERS: "auto"
   PANDAS_CI: 1
   PATTERN: ((not slow and not network and not clipboard) or (single and db))
+  COVERAGE: true
 
 jobs:
-  Linux_py37_locale:
+  Linux_py37_IO:
     runs-on: ubuntu-latest
     defaults:
       run:
         shell: bash -l {0}
 
-    env:
-      ENV_FILE: ci/deps/actions-37-locale.yaml
-      LOCALE_OVERRIDE: zh_CN.UTF-8
+    strategy:
+      matrix:
+        ENV_FILE: [ci/deps/actions-37-db-min.yaml, ci/deps/actions-37-db.yaml]
+      fail-fast: false
 
     services:
       mysql:
@@ -63,106 +65,20 @@ jobs:
       with:
         path: ~/conda_pkgs_dir
         key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{
-          hashFiles('${{ env.ENV_FILE }}') }}
+          hashFiles('${{ matrix.ENV_FILE }}') }}
 
     - uses: conda-incubator/setup-miniconda@v2
       with:
         activate-environment: pandas-dev
         channel-priority: strict
-        environment-file: ${{ env.ENV_FILE }}
+        environment-file: ${{ matrix.ENV_FILE }}
         use-only-tar-bz2: true
 
     - name: Build Pandas
       uses: ./.github/actions/build_pandas
 
     - name: Test
-      run: ci/run_tests.sh
-      if: always()
-
-    - name: Build Version
-      run: pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
-
-    - name: Publish test results
-      uses: actions/upload-artifact@master
-      with:
-        name: Test results
-        path: test-data.xml
-      if: failure()
-
-    - name: Print skipped tests
-      run: python ci/print_skipped.py
-
-    - name: Upload coverage to Codecov
-      uses: codecov/codecov-action@v1
-      with:
-        flags: unittests
-        name: codecov-pandas
-        fail_ci_if_error: false
-
-  Linux_py37_cov:
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        shell: bash -l {0}
-
-    env:
-      ENV_FILE: ci/deps/actions-37-cov.yaml
-      PANDAS_TESTING_MODE: deprecate
-      COVERAGE: true
-
-    services:
-      mysql:
-        image: mysql
-        env:
-          MYSQL_ALLOW_EMPTY_PASSWORD: yes
-          MYSQL_DATABASE: pandas
-        options: >-
-          --health-cmd "mysqladmin ping"
-          --health-interval 10s
-          --health-timeout 5s
-          --health-retries 5
-        ports:
-          - 3306:3306
-
-      postgres:
-        image: postgres
-        env:
-          POSTGRES_USER: postgres
-          POSTGRES_PASSWORD: postgres
-          POSTGRES_DB: pandas
-        options: >-
-          --health-cmd pg_isready
-          --health-interval 10s
-          --health-timeout 5s
-          --health-retries 5
-        ports:
-          - 5432:5432
-
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v1
-
-    - name: Cache conda
-      uses: actions/cache@v1
-      env:
-        CACHE_NUMBER: 0
-      with:
-        path: ~/conda_pkgs_dir
-        key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{
-          hashFiles('${{ env.ENV_FILE }}') }}
-
-    - uses: conda-incubator/setup-miniconda@v2
-      with:
-        activate-environment: pandas-dev
-        channel-priority: strict
-        environment-file: ${{ env.ENV_FILE }}
-        use-only-tar-bz2: true
-
-    - name: Build Pandas
-      uses: ./.github/actions/build_pandas
-
-    - name: Test
-      run: ci/run_tests.sh
+      run: pytest -m "${{ env.PATTERN }}" -n 2 --dist=loadfile -s --strict-markers --durations=30 --junitxml=test-data.xml -s --cov=pandas --cov-report=xml pandas/tests/io
       if: always()
 
     - name: Build Version

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -50,10 +50,6 @@ repos:
     rev: 5.7.0
     hooks:
     -   id: isort
--   repo: https://github.com/MarcoGorelli/no-string-hints
-    rev: v0.1.7
-    hooks:
-    -   id: no-string-hints
 -   repo: https://github.com/asottile/pyupgrade
     rev: v2.10.0
     hooks:
@@ -111,12 +107,6 @@ repos:
             pandas/tests/io/excel/test_writers\.py
             |pandas/tests/io/pytables/common\.py
             |pandas/tests/io/pytables/test_store\.py$
-    -   id: no-pandas-api-types
-        name: Check code for instances of pd.api.types
-        entry: (pd|pandas)\.api\.types\.
-        language: pygrep
-        types: [python]
-        files: ^pandas/tests/
     -   id: non-standard-imports
         name: Check for non-standard imports
         language: pygrep
@@ -128,6 +118,11 @@ repos:
 
             # Check for imports from collections.abc instead of `from collections import abc`
             |from\ collections\.abc\ import
+
+            # Numpy
+            |from\ numpy\ import\ random
+            |from\ numpy\.random\ import
+        types: [python]
     -   id: non-standard-imports-in-tests
         name: Check for non-standard imports in test suite
         language: pygrep
@@ -143,26 +138,17 @@ repos:
 
             # Check for use of pandas.testing instead of tm
             |pd\.testing\.
+
+            # Check for pd.api.types instead of from pandas.api.types import ...
+            |(pd|pandas)\.api\.types\.
         types: [python]
         files: ^pandas/tests/
-    -   id: non-standard-numpy-random-related-imports
-        name: Check for non-standard numpy.random-related imports excluding pandas/_testing.py
-        language: pygrep
-        exclude: pandas/_testing.py
+    -   id: np-bool-and-np-object
+        name: Check for use of np.bool/np.object instead of np.bool_/np.object_
         entry: |
             (?x)
-            # Check for imports from np.random.<method> instead of `from numpy import random` or `from numpy.random import <method>`
-            from\ numpy\ import\ random
-            |from\ numpy.random\ import
-        types: [python]
-    -   id: np-bool
-        name: Check for use of np.bool instead of np.bool_
-        entry: np\.bool[^_8]
-        language: pygrep
-        types_or: [python, cython, rst]
-    -   id: np-object
-        name: Check for use of np.object instead of np.object_
-        entry: np\.object[^_8]
+            np\.bool[^_8]
+            |np\.object[^_8]
         language: pygrep
         types_or: [python, cython, rst]
     -   id: pip-to-conda

diff --git a/ci/deps/actions-37-locale.yaml → ci/deps/actions-37-db-min.yaml b/ci/deps/actions-37-locale.yaml → ci/deps/actions-37-db-min.yaml
@@ -7,6 +7,7 @@ dependencies:
   # tools
   - cython>=0.29.21
   - pytest>=5.0.1
+  - pytest-cov
   - pytest-xdist>=1.21
   - hypothesis>=3.58.0
 

diff --git a/ci/deps/actions-37-cov.yaml → ci/deps/actions-37-db.yaml b/ci/deps/actions-37-cov.yaml → ci/deps/actions-37-db.yaml
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
@@ -199,20 +199,23 @@ def groupsort_indexer(const int64_t[:] index, Py_ssize_t ngroups):
 
     Returns
     -------
-    tuple
-        1-d indexer ordered by groups, group counts.
+    ndarray[intp_t, ndim=1]
+        Indexer
+    ndarray[int64_t, ndim=1]
+        Group Counts
 
     Notes
     -----
     This is a reverse of the label factorization process.
     """
     cdef:
         Py_ssize_t i, loc, label, n
-        ndarray[int64_t] counts, where, result
+        ndarray[int64_t] counts, where
+        ndarray[intp_t] indexer
 
     counts = np.zeros(ngroups + 1, dtype=np.int64)
     n = len(index)
-    result = np.zeros(n, dtype=np.int64)
+    indexer = np.zeros(n, dtype=np.intp)
     where = np.zeros(ngroups + 1, dtype=np.int64)
 
     with nogil:
@@ -228,10 +231,10 @@ def groupsort_indexer(const int64_t[:] index, Py_ssize_t ngroups):
         # this is our indexer
         for i in range(n):
             label = index[i] + 1
-            result[where[label]] = i
+            indexer[where[label]] = i
             where[label] += 1
 
-    return result, counts
+    return indexer, counts
 
 
 @cython.boundscheck(False)

diff --git a/pandas/_libs/algos_take_helper.pxi.in b/pandas/_libs/algos_take_helper.pxi.in
@@ -66,7 +66,7 @@ def take_1d_{{name}}_{{dest}}(const {{c_type_in}}[:] values,
 {{else}}
 def take_1d_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=1] values,
 {{endif}}
-                              const int64_t[:] indexer,
+                              const intp_t[:] indexer,
                               {{c_type_out}}[:] out,
                               fill_value=np.nan):
 
@@ -102,7 +102,7 @@ def take_2d_axis0_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values,
 {{else}}
 def take_2d_axis0_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
 {{endif}}
-                                    ndarray[int64_t] indexer,
+                                    ndarray[intp_t] indexer,
                                     {{c_type_out}}[:, :] out,
                                     fill_value=np.nan):
     cdef:
@@ -156,7 +156,7 @@ def take_2d_axis1_{{name}}_{{dest}}(const {{c_type_in}}[:, :] values,
 {{else}}
 def take_2d_axis1_{{name}}_{{dest}}(ndarray[{{c_type_in}}, ndim=2] values,
 {{endif}}
-                                    ndarray[int64_t] indexer,
+                                    ndarray[intp_t] indexer,
                                     {{c_type_out}}[:, :] out,
                                     fill_value=np.nan):
 

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -19,6 +19,7 @@ from numpy cimport (
     int16_t,
     int32_t,
     int64_t,
+    intp_t,
     ndarray,
     uint8_t,
     uint16_t,
@@ -141,6 +142,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
         Py_ssize_t i, j, N, K, ngroups, size
         ndarray[int64_t] _counts
         ndarray[float64_t, ndim=2] data
+        ndarray[intp_t] indexer
         float64_t* ptr
 
     assert min_count == -1, "'min_count' only used in add and prod"