pandas-dev
diff --git a/‎.pre-commit-config.yaml
+1-1 b/‎.pre-commit-config.yaml
+1-1
diff --git a/‎.travis.yml
+21-3 b/‎.travis.yml
+21-3
diff --git a/‎README.md
+4-3 b/‎README.md
+4-3
diff --git a/‎asv_bench/benchmarks/algorithms.py
+14-3 b/‎asv_bench/benchmarks/algorithms.py
+14-3
diff --git a/‎asv_bench/benchmarks/arithmetic.py
+79-1 b/‎asv_bench/benchmarks/arithmetic.py
+79-1
diff --git a/‎asv_bench/benchmarks/indexing.py
+2-2 b/‎asv_bench/benchmarks/indexing.py
+2-2
diff --git a/‎asv_bench/benchmarks/io/json.py
+6 b/‎asv_bench/benchmarks/io/json.py
+6
diff --git a/‎asv_bench/benchmarks/rolling.py
+23 b/‎asv_bench/benchmarks/rolling.py
+23
diff --git a/‎azure-pipelines.yml
+3 b/‎azure-pipelines.yml
+3
diff --git a/‎ci/azure/windows.yml
+1-1 b/‎ci/azure/windows.yml
+1-1
diff --git a/‎ci/build39.sh
+21 b/‎ci/build39.sh
+21
diff --git a/‎ci/deps/azure-37-numpydev.yaml
+1-1 b/‎ci/deps/azure-37-numpydev.yaml
+1-1
diff --git a/‎ci/deps/azure-windows-37.yaml
+1-1 b/‎ci/deps/azure-windows-37.yaml
+1-1
diff --git a/‎ci/deps/travis-36-locale.yaml
+1-1 b/‎ci/deps/travis-36-locale.yaml
+1-1
diff --git a/‎ci/deps/travis-37-arm64.yaml
+21 b/‎ci/deps/travis-37-arm64.yaml
+21
diff --git a/‎ci/run_tests.sh
+1-1 b/‎ci/run_tests.sh
+1-1
@@ -3,7 +3,7 @@ repos:
     rev: 19.10b0
     hooks:
     -   id: black
-        language_version: python3.7
+        language_version: python3
 -   repo: https://gitlab.com/pycqa/flake8
     rev: 3.7.7
     hooks:
 
@@ -14,6 +14,8 @@ cache:
 
 env:
   global:
+    # Variable for test workers
+    - PYTEST_WORKERS="auto"
     # create a github personal access token
     # cd pandas-dev/pandas
     # travis encrypt 'PANDAS_GH_TOKEN=personal_access_token' -r pandas-dev/pandas
@@ -27,12 +29,21 @@ matrix:
   fast_finish: true
 
   include:
+    # In allowed failures
+    - dist: bionic
+      python: 3.9-dev
+      env:
+        - JOB="3.9-dev" PATTERN="(not slow and not network and not clipboard)"
     - env:
         - JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network and not clipboard)"
 
     - env:
         - JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="(not slow and not network and not clipboard)"
 
+    - arch: arm64
+      env:
+        - JOB="3.7, arm64" PYTEST_WORKERS=8 ENV_FILE="ci/deps/travis-37-arm64.yaml" PATTERN="(not slow and not network and not clipboard)"
+
     - env:
         - JOB="3.6, locale" ENV_FILE="ci/deps/travis-36-locale.yaml" PATTERN="((not slow and not network and not clipboard) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8" SQL="1"
       services:
@@ -53,11 +64,18 @@ matrix:
       services:
         - mysql
         - postgresql
+  allow_failures:
+    - arch: arm64
+      env:
+        - JOB="3.7, arm64" PYTEST_WORKERS=8 ENV_FILE="ci/deps/travis-37-arm64.yaml" PATTERN="(not slow and not network and not clipboard)"
+    - dist: bionic
+      python: 3.9-dev
+      env:
+        - JOB="3.9-dev" PATTERN="(not slow and not network)"
 
 before_install:
   - echo "before_install"
-  # set non-blocking IO on travis
-  # https://github.com/travis-ci/travis-ci/issues/8920#issuecomment-352661024
+  # Use blocking IO on travis.  Ref:  https://github.com/travis-ci/travis-ci/issues/8920#issuecomment-352661024
   - python -c 'import os,sys,fcntl; flags = fcntl.fcntl(sys.stdout, fcntl.F_GETFL); fcntl.fcntl(sys.stdout, fcntl.F_SETFL, flags&~os.O_NONBLOCK);'
   - source ci/travis_process_gbq_encryption.sh
   - export PATH="$HOME/miniconda3/bin:$PATH"
@@ -83,7 +101,7 @@ install:
 script:
   - echo "script start"
   - echo "$JOB"
-  - source activate pandas-dev
+  - if [ "$JOB" != "3.9-dev" ]; then source activate pandas-dev; fi
   - ci/run_tests.sh
 
 after_script:
 
@@ -16,10 +16,11 @@
 [![Downloads](https://anaconda.org/conda-forge/pandas/badges/downloads.svg)](https://pandas.pydata.org)
 [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/pydata/pandas)
 [![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](https://numfocus.org)
+[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 
 ## What is it?
 
-**pandas** is a Python package providing fast, flexible, and expressive data
+**pandas** is a Python package that provides fast, flexible, and expressive data
 structures designed to make working with "relational" or "labeled" data both
 easy and intuitive. It aims to be the fundamental high-level building block for
 doing practical, **real world** data analysis in Python. Additionally, it has
@@ -153,11 +154,11 @@ For usage questions, the best place to go to is [StackOverflow](https://stackove
 Further, general questions and discussions can also take place on the [pydata mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata).
 
 ## Discussion and Development
-Most development discussion is taking place on github in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions.
+Most development discussions take place on github in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions.
 
 ## Contributing to pandas [![Open Source Helpers](https://www.codetriage.com/pandas-dev/pandas/badges/users.svg)](https://www.codetriage.com/pandas-dev/pandas)
 
-All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome.
+All contributions, bug reports, bug fixes, documentation improvements, enhancements, and ideas are welcome.
 
 A detailed overview on how to contribute can be found in the **[contributing guide](https://pandas.pydata.org/docs/dev/development/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
 
 
@@ -34,7 +34,16 @@ class Factorize:
     params = [
         [True, False],
         [True, False],
-        ["int", "uint", "float", "string", "datetime64[ns]", "datetime64[ns, tz]"],
+        [
+            "int",
+            "uint",
+            "float",
+            "string",
+            "datetime64[ns]",
+            "datetime64[ns, tz]",
+            "Int64",
+            "boolean",
+        ],
     ]
     param_names = ["unique", "sort", "dtype"]
 
@@ -49,13 +58,15 @@ def setup(self, unique, sort, dtype):
             "datetime64[ns, tz]": pd.date_range(
                 "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
             ),
+            "Int64": pd.array(np.arange(N), dtype="Int64"),
+            "boolean": pd.array(np.random.randint(0, 2, N), dtype="boolean"),
         }[dtype]
         if not unique:
             data = data.repeat(5)
-        self.idx = data
+        self.data = data
 
     def time_factorize(self, unique, sort, dtype):
-        self.idx.factorize(sort=sort)
+        pd.factorize(self.data, sort=sort)
 
 
 class Duplicated:
 
@@ -101,6 +101,59 @@ def time_frame_op_with_series_axis1(self, opname):
         getattr(operator, opname)(self.df, self.ser)
 
 
+class FrameWithFrameWide:
+    # Many-columns, mixed dtypes
+
+    params = [
+        [
+            # GH#32779 has discussion of which operators are included here
+            operator.add,
+            operator.floordiv,
+            operator.gt,
+        ]
+    ]
+    param_names = ["op"]
+
+    def setup(self, op):
+        # we choose dtypes so as to make the blocks
+        #  a) not perfectly match between right and left
+        #  b) appreciably bigger than single columns
+        n_cols = 2000
+        n_rows = 500
+
+        # construct dataframe with 2 blocks
+        arr1 = np.random.randn(n_rows, int(n_cols / 2)).astype("f8")
+        arr2 = np.random.randn(n_rows, int(n_cols / 2)).astype("f4")
+        df = pd.concat(
+            [pd.DataFrame(arr1), pd.DataFrame(arr2)], axis=1, ignore_index=True,
+        )
+        # should already be the case, but just to be sure
+        df._consolidate_inplace()
+
+        # TODO: GH#33198 the setting here shoudlnt need two steps
+        arr1 = np.random.randn(n_rows, int(n_cols / 4)).astype("f8")
+        arr2 = np.random.randn(n_rows, int(n_cols / 2)).astype("i8")
+        arr3 = np.random.randn(n_rows, int(n_cols / 4)).astype("f8")
+        df2 = pd.concat(
+            [pd.DataFrame(arr1), pd.DataFrame(arr2), pd.DataFrame(arr3)],
+            axis=1,
+            ignore_index=True,
+        )
+        # should already be the case, but just to be sure
+        df2._consolidate_inplace()
+
+        self.left = df
+        self.right = df2
+
+    def time_op_different_blocks(self, op):
+        # blocks (and dtypes) are not aligned
+        op(self.left, self.right)
+
+    def time_op_same_blocks(self, op):
+        # blocks (and dtypes) are aligned
+        op(self.left, self.left)
+
+
 class Ops:
 
     params = [[True, False], ["default", 1]]
@@ -413,7 +466,32 @@ def setup(self, offset):
         self.rng = rng
 
     def time_apply_index(self, offset):
-        offset.apply_index(self.rng)
+        self.rng + offset
+
+
+class BinaryOpsMultiIndex:
+    params = ["sub", "add", "mul", "div"]
+    param_names = ["func"]
+
+    def setup(self, func):
+        date_range = pd.date_range("20200101 00:00", "20200102 0:00", freq="S")
+        level_0_names = [str(i) for i in range(30)]
+
+        index = pd.MultiIndex.from_product([level_0_names, date_range])
+        column_names = ["col_1", "col_2"]
+
+        self.df = pd.DataFrame(
+            np.random.rand(len(index), 2), index=index, columns=column_names
+        )
+
+        self.arg_df = pd.DataFrame(
+            np.random.randint(1, 10, (len(level_0_names), 2)),
+            index=level_0_names,
+            columns=column_names,
+        )
+
+    def time_binary_op_multiindex(self, func):
+        getattr(self.df, func)(self.arg_df, level=0)
 
 
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
@@ -158,9 +158,9 @@ def time_boolean_rows_boolean(self):
 class DataFrameNumericIndexing:
     def setup(self):
         self.idx_dupe = np.array(range(30)) * 99
-        self.df = DataFrame(np.random.randn(10000, 5))
+        self.df = DataFrame(np.random.randn(100000, 5))
         self.df_dup = concat([self.df, 2 * self.df, 3 * self.df])
-        self.bool_indexer = [True] * 5000 + [False] * 5000
+        self.bool_indexer = [True] * 50000 + [False] * 50000
 
     def time_iloc_dups(self):
         self.df_dup.iloc[self.idx_dupe]
 
@@ -53,12 +53,18 @@ def time_read_json_lines(self, index):
     def time_read_json_lines_concat(self, index):
         concat(read_json(self.fname, orient="records", lines=True, chunksize=25000))
 
+    def time_read_json_lines_nrows(self, index):
+        read_json(self.fname, orient="records", lines=True, nrows=25000)
+
     def peakmem_read_json_lines(self, index):
         read_json(self.fname, orient="records", lines=True)
 
     def peakmem_read_json_lines_concat(self, index):
         concat(read_json(self.fname, orient="records", lines=True, chunksize=25000))
 
+    def peakmem_read_json_lines_nrows(self, index):
+        read_json(self.fname, orient="records", lines=True, nrows=15000)
+
 
 class ToJSON(BaseIO):
 
 
@@ -186,4 +186,27 @@ def peakmem_rolling(self, constructor, window_size, dtype, method):
         getattr(self.roll, method)()
 
 
+class Groupby:
+
+    params = ["sum", "median", "mean", "max", "min", "kurt", "sum"]
+
+    def setup(self, method):
+        N = 1000
+        df = pd.DataFrame(
+            {
+                "A": [str(i) for i in range(N)] * 10,
+                "B": list(range(N)) * 10,
+                "C": pd.date_range(start="1900-01-01", freq="1min", periods=N * 10),
+            }
+        )
+        self.groupby_roll_int = df.groupby("A").rolling(window=2)
+        self.groupby_roll_offset = df.groupby("A").rolling(window="30s", on="C")
+
+    def time_rolling_int(self, method):
+        getattr(self.groupby_roll_int, method)()
+
+    def time_rolling_offset(self, method):
+        getattr(self.groupby_roll_offset, method)()
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
@@ -5,6 +5,9 @@ trigger:
 pr:
 - master
 
+variables:
+  PYTEST_WORKERS: auto
+
 jobs:
 # Mac and Linux use the same template
 - template: ci/azure/posix.yml
 
@@ -13,7 +13,7 @@ jobs:
         CONDA_PY: "36"
         PATTERN: "not slow and not network"
 
-      py37_np141:
+      py37_np18:
         ENV_FILE: ci/deps/azure-windows-37.yaml
         CONDA_PY: "37"
         PATTERN: "not slow and not network"
 
@@ -0,0 +1,21 @@
+#!/bin/bash -e
+# Special build for python3.9 until numpy puts its own wheels up
+
+sudo apt-get install build-essential gcc xvfb
+pip install --no-deps -U pip wheel setuptools
+pip install python-dateutil pytz pytest pytest-xdist hypothesis
+pip install cython --pre # https://github.com/cython/cython/issues/3395
+
+git clone https://github.com/numpy/numpy
+cd numpy
+python setup.py build_ext --inplace
+python setup.py install
+cd ..
+rm -rf numpy
+
+python setup.py build_ext -inplace
+python -m pip install --no-build-isolation -e .
+
+python -c "import sys; print(sys.version_info)"
+python -c "import pandas as pd"
+python -c "import hypothesis"
@@ -16,7 +16,7 @@ dependencies:
   - pip:
     - cython==0.29.16 # GH#34014
     - "git+git://github.com/dateutil/dateutil.git"
-    - "-f https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com"
+    - "--extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple"
     - "--pre"
     - "numpy"
     - "scipy"
@@ -22,7 +22,7 @@ dependencies:
   - matplotlib=2.2.*
   - moto
   - numexpr
-  - numpy=1.14.*
+  - numpy=1.18.*
   - openpyxl
   - pyarrow=0.14
   - pytables
 
@@ -27,7 +27,7 @@ dependencies:
   - numexpr
   - numpy
   - openpyxl
-  - pandas-gbq=0.8.0
+  - pandas-gbq=0.12.0
   - psycopg2=2.6.2
   - pymysql=0.7.11
   - pytables
 
@@ -0,0 +1,21 @@
+name: pandas-dev
+channels:
+  - defaults
+  - conda-forge
+dependencies:
+  - python=3.7.*
+
+  # tools
+  - cython>=0.29.13
+  - pytest>=5.0.1
+  - pytest-xdist>=1.21
+  - hypothesis>=3.58.0
+
+  # pandas dependencies
+  - botocore>=1.11
+  - numpy
+  - python-dateutil
+  - pytz
+  - pip
+  - pip:
+    - moto
@@ -20,7 +20,7 @@ if [[ $(uname) == "Linux" && -z $DISPLAY ]]; then
     XVFB="xvfb-run "
 fi
 
-PYTEST_CMD="${XVFB}pytest -m \"$PATTERN\" -n auto --dist=loadfile -s --strict --durations=10 --junitxml=test-data.xml $TEST_ARGS $COVERAGE pandas"
+PYTEST_CMD="${XVFB}pytest -m \"$PATTERN\" -n $PYTEST_WORKERS --dist=loadfile -s --strict --durations=30 --junitxml=test-data.xml $TEST_ARGS $COVERAGE pandas"
 
 echo $PYTEST_CMD
 sh -c "$PYTEST_CMD"