lukemanley
diff --git a/‎.github/workflows/code-checks.yml
+24 b/‎.github/workflows/code-checks.yml
+24
diff --git a/‎.github/workflows/datamanger.yml
+2-1 b/‎.github/workflows/datamanger.yml
+2-1
diff --git a/‎.github/workflows/posix.yml
+48-26 b/‎.github/workflows/posix.yml
+48-26
diff --git a/‎.github/workflows/python-dev.yml
+1-1 b/‎.github/workflows/python-dev.yml
+1-1
diff --git a/‎.pre-commit-config.yaml
+3-3 b/‎.pre-commit-config.yaml
+3-3
diff --git a/‎Dockerfile
+2-2 b/‎Dockerfile
+2-2
diff --git a/‎README.md
+1-1 b/‎README.md
+1-1
diff --git a/‎asv_bench/benchmarks/algorithms.py
+5-5 b/‎asv_bench/benchmarks/algorithms.py
+5-5
diff --git a/‎asv_bench/benchmarks/algos/isin.py
+10-10 b/‎asv_bench/benchmarks/algos/isin.py
+10-10
@@ -156,3 +156,27 @@ jobs:
         name: Benchmarks log
         path: asv_bench/benchmarks.log
       if: failure()
+
+  build_docker_dev_environment:
+    name: Build Docker Dev Environment
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash -l {0}
+
+    concurrency:
+      # https://github.community/t/concurrecy-not-work-for-push/183068/7
+      group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-build_docker_dev_environment
+      cancel-in-progress: true
+
+    steps:
+      - name: Clean up dangling images
+        run: docker image prune -f
+
+      - name: Checkout
+        uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      - name: Build image
+        run: docker build --pull --no-cache --tag pandas-dev-env .
@@ -20,6 +20,7 @@ jobs:
   data_manager:
     name: Test experimental data manager
     runs-on: ubuntu-latest
+    timeout-minutes: 120
     services:
       moto:
         image: motoserver/moto
@@ -45,7 +46,7 @@ jobs:
     - name: Run tests
       env:
         PANDAS_DATA_MANAGER: array
-        PATTERN: "not network and not clipboard"
+        PATTERN: "not network and not clipboard and not single_cpu"
         PYTEST_WORKERS: "auto"
         PYTEST_TARGET: pandas
       run: |
 
@@ -13,7 +13,6 @@ on:
       - "doc/**"
 
 env:
-  PYTEST_WORKERS: "auto"
   PANDAS_CI: 1
 
 jobs:
@@ -22,35 +21,54 @@ jobs:
     defaults:
       run:
         shell: bash -l {0}
+    timeout-minutes: 120
     strategy:
       matrix:
-        settings: [
-          [actions-38-downstream_compat.yaml, "not slow and not network and not clipboard", "", "", "", "", ""],
-          [actions-38-minimum_versions.yaml, "not clipboard", "", "", "", "", ""],
-          [actions-38.yaml, "not slow and not network", "language-pack-it xsel", "it_IT.utf8", "it_IT.utf8", "", ""],
-          [actions-38.yaml, "not slow and not network", "language-pack-zh-hans xsel", "zh_CN.utf8", "zh_CN.utf8", "", ""],
-          [actions-38.yaml, "not clipboard", "", "", "", "", ""],
-          [actions-pypy-38.yaml, "not slow and not clipboard", "", "", "", "", "--max-worker-restart 0"],
-          [actions-39.yaml, "not clipboard", "", "", "", "", ""],
-          [actions-310-numpydev.yaml, "not slow and not network", "xsel", "", "", "deprecate", "-W error"],
-          [actions-310.yaml, "not clipboard", "", "", "", "", ""],
-        ]
+        env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml]
+        pattern: ["not single_cpu", "single_cpu"]
+        # Don't test pyarrow v2/3: Causes timeouts in read_csv engine
+        # even if tests are skipped/xfailed
+        pyarrow_version: ["5", "6", "7"]
+        include:
+          - env_file: actions-38-downstream_compat.yaml
+            pattern: "not slow and not network and not single_cpu"
+            pytest_target: "pandas/tests/test_downstream.py"
+          - env_file: actions-38-minimum_versions.yaml
+            pattern: "not slow and not network and not single_cpu"
+          - env_file: actions-38.yaml
+            pattern: "not slow and not network and not single_cpu"
+            extra_apt: "language-pack-it"
+            lang: "it_IT.utf8"
+            lc_all: "it_IT.utf8"
+          - env_file: actions-38.yaml
+            pattern: "not slow and not network and not single_cpu"
+            extra_apt: "language-pack-zh-hans"
+            lang: "zh_CN.utf8"
+            lc_all: "zh_CN.utf8"
+          - env_file: actions-pypy-38.yaml
+            pattern: "not slow and not network and not single_cpu"
+            test_args: "--max-worker-restart 0"
+          - env_file: actions-310-numpydev.yaml
+            pattern: "not slow and not network and not single_cpu"
+            pandas_testing_mode: "deprecate"
+            test_args: "-W error"
       fail-fast: false
     env:
-      ENV_FILE: ci/deps/${{ matrix.settings[0] }}
-      PATTERN: ${{ matrix.settings[1] }}
-      EXTRA_APT: ${{ matrix.settings[2] }}
-      LANG: ${{ matrix.settings[3] }}
-      LC_ALL: ${{ matrix.settings[4] }}
-      PANDAS_TESTING_MODE: ${{ matrix.settings[5] }}
-      TEST_ARGS: ${{ matrix.settings[6] }}
-      PYTEST_TARGET:  pandas
-      IS_PYPY: ${{ contains(matrix.settings[0], 'pypy') }}
+      ENV_FILE: ci/deps/${{ matrix.env_file }}
+      PATTERN: ${{ matrix.pattern }}
+      EXTRA_APT: ${{ matrix.extra_apt || '' }}
+      LANG: ${{ matrix.lang || '' }}
+      LC_ALL: ${{ matrix.lc_all || '' }}
+      PANDAS_TESTING_MODE: ${{ matrix.pandas_testing_mode || '' }}
+      TEST_ARGS: ${{ matrix.test_args || '' }}
+      PYTEST_WORKERS: ${{ contains(matrix.pattern, 'not single_cpu') && 'auto' || '1' }}
+      PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}
+      IS_PYPY: ${{ contains(matrix.env_file, 'pypy') }}
       # TODO: re-enable coverage on pypy, its slow
-      COVERAGE: ${{ !contains(matrix.settings[0], 'pypy') }}
+      COVERAGE: ${{ !contains(matrix.env_file, 'pypy') }}
     concurrency:
       # https://github.community/t/concurrecy-not-work-for-push/183068/7
-      group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.settings[0] }}-${{ matrix.settings[1] }}-${{ matrix.settings[2] }}
+      group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.pyarrow_version || '' }}-${{ matrix.extra_apt || '' }}
       cancel-in-progress: true
 
     services:
@@ -105,7 +123,8 @@ jobs:
           hashFiles('${{ env.ENV_FILE }}') }}
 
     - name: Extra installs
-      run: sudo apt-get update && sudo apt-get install -y libc6-dev-i386 ${{ env.EXTRA_APT }}
+      # xsel for clipboard tests
+      run: sudo apt-get update && sudo apt-get install -y libc6-dev-i386 xsel ${{ env.EXTRA_APT }}
 
     - uses: conda-incubator/setup-miniconda@v2
       with:
@@ -117,6 +136,10 @@ jobs:
         use-only-tar-bz2: true
       if: ${{ env.IS_PYPY == 'false' }} # No pypy3.8 support
 
+    - name: Upgrade Arrow version
+      run: conda install -n pandas-dev -c conda-forge --no-update-deps pyarrow=${{ matrix.pyarrow_version }}
+      if: ${{ matrix.pyarrow_version }}
+
     - name: Setup PyPy
       uses: actions/setup-python@v2
       with:
@@ -127,8 +150,7 @@ jobs:
       shell: bash
       run: |
         # TODO: re-enable cov, its slowing the tests down though
-        # TODO: Unpin Cython, the new Cython 0.29.26 is causing compilation errors
-        pip install Cython==0.29.25 numpy python-dateutil pytz pytest>=6.0 pytest-xdist>=1.31.0 hypothesis>=5.5.3
+        pip install Cython numpy python-dateutil pytz pytest>=6.0 pytest-xdist>=1.31.0 hypothesis>=5.5.3
       if: ${{ env.IS_PYPY == 'true' }}
 
     - name: Build Pandas
 
@@ -23,7 +23,7 @@ on:
 env:
   PYTEST_WORKERS: "auto"
   PANDAS_CI: 1
-  PATTERN: "not slow and not network and not clipboard"
+  PATTERN: "not slow and not network and not clipboard and not single_cpu"
   COVERAGE: true
   PYTEST_TARGET: pandas
 
 
@@ -4,7 +4,7 @@ ci:
     autofix_prs: false
 repos:
 -   repo: https://github.com/MarcoGorelli/absolufy-imports
-    rev: v0.3.0
+    rev: v0.3.1
     hooks:
     -   id: absolufy-imports
         files: ^pandas/
@@ -16,7 +16,7 @@ repos:
         pass_filenames: true
         require_serial: false
 -   repo: https://github.com/python/black
-    rev: 21.12b0
+    rev: 22.1.0
     hooks:
     -   id: black
 -   repo: https://github.com/codespell-project/codespell
@@ -50,7 +50,7 @@ repos:
         - flake8==4.0.1
         - flake8-comprehensions==3.7.0
         - flake8-bugbear==21.3.2
-        - pandas-dev-flaker==0.2.0
+        - pandas-dev-flaker==0.4.0
 -   repo: https://github.com/PyCQA/isort
     rev: 5.10.1
     hooks:
 
@@ -1,4 +1,4 @@
-FROM quay.io/condaforge/miniforge3
+FROM quay.io/condaforge/miniforge3:4.11.0-0
 
 # if you forked pandas, you can pass in your own GitHub username to use your fork
 # i.e. gh_username=myname
@@ -45,4 +45,4 @@ RUN . /opt/conda/etc/profile.d/conda.sh \
     && cd "$pandas_home" \
     && export \
     && python setup.py build_ext -j 4 \
-    && python -m pip install -e .
+    && python -m pip install --no-build-isolation -e .
@@ -136,7 +136,7 @@ or alternatively
 python setup.py develop
 ```
 
-See the full instructions for [installing from source](https://pandas.pydata.org/pandas-docs/stable/install.html#installing-from-source).
+See the full instructions for [installing from source](https://pandas.pydata.org/pandas-docs/stable/getting_started/install.html#installing-from-source).
 
 ## License
 [BSD 3](LICENSE)
 
@@ -34,7 +34,7 @@ class Factorize:
     param_names = ["unique", "sort", "dtype"]
 
     def setup(self, unique, sort, dtype):
-        N = 10 ** 5
+        N = 10**5
         string_index = tm.makeStringIndex(N)
         string_arrow = None
         if dtype == "string[pyarrow]":
@@ -74,7 +74,7 @@ class Duplicated:
     param_names = ["unique", "keep", "dtype"]
 
     def setup(self, unique, keep, dtype):
-        N = 10 ** 5
+        N = 10**5
         data = {
             "int": pd.Index(np.arange(N), dtype="int64"),
             "uint": pd.Index(np.arange(N), dtype="uint64"),
@@ -97,7 +97,7 @@ def time_duplicated(self, unique, keep, dtype):
 
 class Hashing:
     def setup_cache(self):
-        N = 10 ** 5
+        N = 10**5
 
         df = pd.DataFrame(
             {
@@ -145,7 +145,7 @@ class Quantile:
     param_names = ["quantile", "interpolation", "dtype"]
 
     def setup(self, quantile, interpolation, dtype):
-        N = 10 ** 5
+        N = 10**5
         data = {
             "int": np.arange(N),
             "uint": np.arange(N).astype(np.uint64),
@@ -158,7 +158,7 @@ def time_quantile(self, quantile, interpolation, dtype):
 
 
 class SortIntegerArray:
-    params = [10 ** 3, 10 ** 5]
+    params = [10**3, 10**5]
 
     def setup(self, N):
         data = np.arange(N, dtype=float)
 
@@ -49,7 +49,7 @@ def setup(self, dtype):
 
         elif dtype in ["category[object]", "category[int]"]:
             # Note: sizes are different in this case than others
-            n = 5 * 10 ** 5
+            n = 5 * 10**5
             sample_size = 100
 
             arr = list(np.random.randint(0, n // 10, size=n))
@@ -174,7 +174,7 @@ class IsinWithArange:
 
     def setup(self, dtype, M, offset_factor):
         offset = int(M * offset_factor)
-        tmp = Series(np.random.randint(offset, M + offset, 10 ** 6))
+        tmp = Series(np.random.randint(offset, M + offset, 10**6))
         self.series = tmp.astype(dtype)
         self.values = np.arange(M).astype(dtype)
 
@@ -191,8 +191,8 @@ class IsInFloat64:
     param_names = ["dtype", "title"]
 
     def setup(self, dtype, title):
-        N_many = 10 ** 5
-        N_few = 10 ** 6
+        N_many = 10**5
+        N_few = 10**6
         self.series = Series([1, 2], dtype=dtype)
 
         if title == "many_different_values":
@@ -240,10 +240,10 @@ class IsInForObjects:
     param_names = ["series_type", "vals_type"]
 
     def setup(self, series_type, vals_type):
-        N_many = 10 ** 5
+        N_many = 10**5
 
         if series_type == "nans":
-            ser_vals = np.full(10 ** 4, np.nan)
+            ser_vals = np.full(10**4, np.nan)
         elif series_type == "short":
             ser_vals = np.arange(2)
         elif series_type == "long":
@@ -254,7 +254,7 @@ def setup(self, series_type, vals_type):
         self.series = Series(ser_vals).astype(object)
 
         if vals_type == "nans":
-            values = np.full(10 ** 4, np.nan)
+            values = np.full(10**4, np.nan)
         elif vals_type == "short":
             values = np.arange(2)
         elif vals_type == "long":
@@ -277,7 +277,7 @@ class IsInLongSeriesLookUpDominates:
     param_names = ["dtype", "MaxNumber", "series_type"]
 
     def setup(self, dtype, MaxNumber, series_type):
-        N = 10 ** 7
+        N = 10**7
 
         if series_type == "random_hits":
             array = np.random.randint(0, MaxNumber, N)
@@ -304,15 +304,15 @@ class IsInLongSeriesValuesDominate:
     param_names = ["dtype", "series_type"]
 
     def setup(self, dtype, series_type):
-        N = 10 ** 7
+        N = 10**7
 
         if series_type == "random":
             vals = np.random.randint(0, 10 * N, N)
         if series_type == "monotone":
             vals = np.arange(N)
 
         self.values = vals.astype(dtype.lower())
-        M = 10 ** 6 + 1
+        M = 10**6 + 1
         self.series = Series(np.arange(M)).astype(dtype)
 
     def time_isin(self, dtypes, series_type):