simonjayhawkins
diff --git a/‎.github/workflows/ci.yml
+1 b/‎.github/workflows/ci.yml
+1
diff --git a/‎.github/workflows/database.yml
+3 b/‎.github/workflows/database.yml
+3
diff --git a/‎.github/workflows/posix.yml
+3 b/‎.github/workflows/posix.yml
+3
diff --git a/‎.github/workflows/python-dev.yml
+2 b/‎.github/workflows/python-dev.yml
+2
diff --git a/‎.pre-commit-config.yaml
+1-1 b/‎.pre-commit-config.yaml
+1-1
diff --git a/‎MANIFEST.in
+13-2 b/‎MANIFEST.in
+13-2
diff --git a/‎README.md
+4-4 b/‎README.md
+4-4
diff --git a/‎asv_bench/benchmarks/algorithms.py
+10-13 b/‎asv_bench/benchmarks/algorithms.py
+10-13
diff --git a/‎asv_bench/benchmarks/algos/isin.py
+3-13 b/‎asv_bench/benchmarks/algos/isin.py
+3-13
diff --git a/‎asv_bench/benchmarks/frame_ctor.py
-1 b/‎asv_bench/benchmarks/frame_ctor.py
-1
diff --git a/‎asv_bench/benchmarks/groupby.py
+5-3 b/‎asv_bench/benchmarks/groupby.py
+5-3
diff --git a/‎asv_bench/benchmarks/hash_functions.py
-1 b/‎asv_bench/benchmarks/hash_functions.py
-1
diff --git a/‎asv_bench/benchmarks/indexing.py
-3 b/‎asv_bench/benchmarks/indexing.py
-3
diff --git a/‎asv_bench/benchmarks/series_methods.py
-1 b/‎asv_bench/benchmarks/series_methods.py
-1
diff --git a/‎asv_bench/benchmarks/strings.py
+1-3 b/‎asv_bench/benchmarks/strings.py
+1-3
diff --git a/‎azure-pipelines.yml
+9-2 b/‎azure-pipelines.yml
+9-2
diff --git a/‎ci/code_checks.sh
+4 b/‎ci/code_checks.sh
+4
diff --git a/‎ci/deps/azure-macos-37.yaml
+1-1 b/‎ci/deps/azure-macos-37.yaml
+1-1
diff --git a/‎doc/source/_static/ci.png
508 KB b/‎doc/source/_static/ci.png
508 KB
@@ -7,6 +7,7 @@ on:
     branches:
       - master
       - 1.2.x
+      - 1.3.x
 
 env:
   ENV_FILE: environment.yml
 
@@ -7,6 +7,9 @@ on:
     branches:
       - master
       - 1.2.x
+      - 1.3.x
+    paths-ignore:
+      - "doc/**"
 
 env:
   PYTEST_WORKERS: "auto"
 
@@ -7,6 +7,9 @@ on:
     branches:
       - master
       - 1.2.x
+      - 1.3.x
+    paths-ignore:
+      - "doc/**"
 
 env:
   PYTEST_WORKERS: "auto"
 
@@ -7,6 +7,8 @@ on:
   pull_request:
     branches:
       - master
+    paths-ignore:
+      - "doc/**"
 
 jobs:
   build:
 
@@ -9,7 +9,7 @@ repos:
     -   id: absolufy-imports
         files: ^pandas/
 -   repo: https://github.com/python/black
-    rev: 20.8b1
+    rev: 21.5b2
     hooks:
     -   id: black
 -   repo: https://github.com/codespell-project/codespell
 
@@ -17,18 +17,19 @@ global-exclude *.h5
 global-exclude *.html
 global-exclude *.json
 global-exclude *.jsonl
+global-exclude *.msgpack
 global-exclude *.pdf
 global-exclude *.pickle
 global-exclude *.png
 global-exclude *.pptx
-global-exclude *.pyc
-global-exclude *.pyd
 global-exclude *.ods
 global-exclude *.odt
+global-exclude *.orc
 global-exclude *.sas7bdat
 global-exclude *.sav
 global-exclude *.so
 global-exclude *.xls
+global-exclude *.xlsb
 global-exclude *.xlsm
 global-exclude *.xlsx
 global-exclude *.xpt
@@ -39,6 +40,13 @@ global-exclude .DS_Store
 global-exclude .git*
 global-exclude \#*
 
+global-exclude *.c
+global-exclude *.cpp
+global-exclude *.h
+
+global-exclude *.py[ocd]
+global-exclude *.pxi
+
 # GH 39321
 # csv_dir_path fixture checks the existence of the directory
 # exclude the whole directory to avoid running related tests in sdist
@@ -47,3 +55,6 @@ prune pandas/tests/io/parser/data
 include versioneer.py
 include pandas/_version.py
 include pandas/io/formats/templates/*.tpl
+
+graft pandas/_libs/src
+graft pandas/_libs/tslibs/src
@@ -10,13 +10,13 @@
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3509134.svg)](https://doi.org/10.5281/zenodo.3509134)
 [![Package Status](https://img.shields.io/pypi/status/pandas.svg)](https://pypi.org/project/pandas/)
 [![License](https://img.shields.io/pypi/l/pandas.svg)](https://github.com/pandas-dev/pandas/blob/master/LICENSE)
-[![Travis Build Status](https://travis-ci.org/pandas-dev/pandas.svg?branch=master)](https://travis-ci.org/pandas-dev/pandas)
 [![Azure Build Status](https://dev.azure.com/pandas-dev/pandas/_apis/build/status/pandas-dev.pandas?branch=master)](https://dev.azure.com/pandas-dev/pandas/_build/latest?definitionId=1&branch=master)
 [![Coverage](https://codecov.io/github/pandas-dev/pandas/coverage.svg?branch=master)](https://codecov.io/gh/pandas-dev/pandas)
 [![Downloads](https://anaconda.org/conda-forge/pandas/badges/downloads.svg)](https://pandas.pydata.org)
 [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/pydata/pandas)
 [![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](https://numfocus.org)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
+[![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/)
 
 ## What is it?
 
@@ -101,8 +101,8 @@ pip install pandas
 
 ## Dependencies
 - [NumPy - Adds support for large, multi-dimensional arrays, matrices and high-level mathematical functions to operate on these arrays](https://www.numpy.org)
-- [python-dateutil - Provides powerful extensions to the standard datetime module](https://labix.org/python-dateutil)
-- [pytz - Brings the Olson tz database into Python which allows accurate and cross platform timezone calculations](https://pythonhosted.org/pytz)
+- [python-dateutil - Provides powerful extensions to the standard datetime module](https://dateutil.readthedocs.io/en/stable/index.html)
+- [pytz - Brings the Olson tz database into Python which allows accurate and cross platform timezone calculations](https://github.com/stub42/pytz)
 
 See the [full installation instructions](https://pandas.pydata.org/pandas-docs/stable/install.html#dependencies) for minimum supported versions of required, recommended and optional dependencies.
 
@@ -121,7 +121,7 @@ cloning the git repo), execute:
 python setup.py install
 ```
 
-or for installing in [development mode](https://pip.pypa.io/en/latest/reference/pip_install.html#editable-installs):
+or for installing in [development mode](https://pip.pypa.io/en/latest/cli/pip_install/#install-editable):
 
 
 ```sh
 
@@ -23,41 +23,38 @@ class Factorize:
             "int",
             "uint",
             "float",
-            "string",
+            "object",
             "datetime64[ns]",
             "datetime64[ns, tz]",
             "Int64",
             "boolean",
-            "string_arrow",
+            "string[pyarrow]",
         ],
     ]
     param_names = ["unique", "sort", "dtype"]
 
     def setup(self, unique, sort, dtype):
         N = 10 ** 5
         string_index = tm.makeStringIndex(N)
-        try:
-            from pandas.core.arrays.string_arrow import ArrowStringDtype
-
-            string_arrow = pd.array(string_index, dtype=ArrowStringDtype())
-        except ImportError:
-            string_arrow = None
-
-        if dtype == "string_arrow" and not string_arrow:
-            raise NotImplementedError
+        string_arrow = None
+        if dtype == "string[pyarrow]":
+            try:
+                string_arrow = pd.array(string_index, dtype="string[pyarrow]")
+            except ImportError:
+                raise NotImplementedError
 
         data = {
             "int": pd.Int64Index(np.arange(N)),
             "uint": pd.UInt64Index(np.arange(N)),
             "float": pd.Float64Index(np.random.randn(N)),
-            "string": string_index,
+            "object": string_index,
             "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
             "datetime64[ns, tz]": pd.date_range(
                 "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
             ),
             "Int64": pd.array(np.arange(N), dtype="Int64"),
             "boolean": pd.array(np.random.randint(0, 2, N), dtype="boolean"),
-            "string_arrow": string_arrow,
+            "string[pyarrow]": string_arrow,
         }[dtype]
         if not unique:
             data = data.repeat(5)
 
@@ -25,8 +25,8 @@ class IsIn:
         "category[object]",
         "category[int]",
         "str",
-        "string",
-        "arrow_string",
+        "string[python]",
+        "string[pyarrow]",
     ]
     param_names = ["dtype"]
 
@@ -50,8 +50,6 @@ def setup(self, dtype):
 
         elif dtype in ["category[object]", "category[int]"]:
             # Note: sizes are different in this case than others
-            np.random.seed(1234)
-
             n = 5 * 10 ** 5
             sample_size = 100
 
@@ -62,9 +60,7 @@ def setup(self, dtype):
             self.values = np.random.choice(arr, sample_size)
             self.series = Series(arr).astype("category")
 
-        elif dtype in ["str", "string", "arrow_string"]:
-            from pandas.core.arrays.string_arrow import ArrowStringDtype  # noqa: F401
-
+        elif dtype in ["str", "string[python]", "string[pyarrow]"]:
             try:
                 self.series = Series(tm.makeStringIndex(N), dtype=dtype)
             except ImportError:
@@ -101,7 +97,6 @@ class IsinAlmostFullWithRandomInt:
     def setup(self, dtype, exponent, title):
         M = 3 * 2 ** (exponent - 2)
         # 0.77-the maximal share of occupied buckets
-        np.random.seed(42)
         self.series = Series(np.random.randint(0, M, M)).astype(dtype)
 
         values = np.random.randint(0, M, M).astype(dtype)
@@ -134,7 +129,6 @@ class IsinWithRandomFloat:
     param_names = ["dtype", "size", "title"]
 
     def setup(self, dtype, size, title):
-        np.random.seed(42)
         self.values = np.random.rand(size)
         self.series = Series(self.values).astype(dtype)
         np.random.shuffle(self.values)
@@ -181,7 +175,6 @@ class IsinWithArange:
 
     def setup(self, dtype, M, offset_factor):
         offset = int(M * offset_factor)
-        np.random.seed(42)
         tmp = Series(np.random.randint(offset, M + offset, 10 ** 6))
         self.series = tmp.astype(dtype)
         self.values = np.arange(M).astype(dtype)
@@ -292,10 +285,8 @@ def setup(self, dtype, MaxNumber, series_type):
             raise NotImplementedError
 
         if series_type == "random_hits":
-            np.random.seed(42)
             array = np.random.randint(0, MaxNumber, N)
         if series_type == "random_misses":
-            np.random.seed(42)
             array = np.random.randint(0, MaxNumber, N) + MaxNumber
         if series_type == "monotone_hits":
             array = np.repeat(np.arange(MaxNumber), N // MaxNumber)
@@ -324,7 +315,6 @@ def setup(self, dtype, series_type):
             raise NotImplementedError
 
         if series_type == "random":
-            np.random.seed(42)
             vals = np.random.randint(0, 10 * N, N)
         if series_type == "monotone":
             vals = np.arange(N)
 
@@ -67,7 +67,6 @@ class FromDictwithTimestamp:
 
     def setup(self, offset):
         N = 10 ** 3
-        np.random.seed(1234)
         idx = date_range(Timestamp("1/1/1900"), freq=offset, periods=N)
         df = DataFrame(np.random.randn(N, 10), index=idx)
         self.d = df.to_dict()
 
@@ -393,7 +393,7 @@ class GroupByMethods:
 
     param_names = ["dtype", "method", "application"]
     params = [
-        ["int", "float", "object", "datetime"],
+        ["int", "float", "object", "datetime", "uint"],
         [
             "all",
             "any",
@@ -442,6 +442,8 @@ def setup(self, dtype, method, application):
         values = rng.take(np.random.randint(0, ngroups, size=size))
         if dtype == "int":
             key = np.random.randint(0, size, size=size)
+        elif dtype == "uint":
+            key = np.random.randint(0, size, size=size, dtype=dtype)
         elif dtype == "float":
             key = np.concatenate(
                 [np.random.random(ngroups) * 0.1, np.random.random(ngroups) * 10.0]
@@ -505,11 +507,11 @@ def time_frame_agg(self, dtype, method):
         self.df.groupby("key").agg(method)
 
 
-class CumminMax:
+class Cumulative:
     param_names = ["dtype", "method"]
     params = [
         ["float64", "int64", "Float64", "Int64"],
-        ["cummin", "cummax"],
+        ["cummin", "cummax", "cumsum"],
     ]
 
     def setup(self, dtype, method):
 
@@ -67,7 +67,6 @@ class NumericSeriesIndexingShuffled:
 
     def setup(self, index, N):
         vals = np.array(list(range(55)) + [54] + list(range(55, N - 1)))
-        np.random.seed(42)
         np.random.shuffle(vals)
         indices = index(vals)
         self.data = pd.Series(np.arange(N), index=indices)
 
@@ -368,17 +368,14 @@ def setup(self):
         self.df = DataFrame(index=range(self.N))
 
     def time_insert(self):
-        np.random.seed(1234)
         for i in range(100):
             self.df.insert(0, i, np.random.randn(self.N), allow_duplicates=True)
 
     def time_assign_with_setitem(self):
-        np.random.seed(1234)
         for i in range(100):
             self.df[i] = np.random.randn(self.N)
 
     def time_assign_list_like_with_setitem(self):
-        np.random.seed(1234)
         self.df[list(range(100))] = np.random.randn(self.N, 100)
 
     def time_assign_list_of_columns_concat(self):
 
@@ -145,7 +145,6 @@ class Mode:
     param_names = ["N", "dtype"]
 
     def setup(self, N, dtype):
-        np.random.seed(42)
         self.s = Series(np.random.randint(0, N, size=10 * N)).astype(dtype)
 
     def time_mode(self, N, dtype):
 
@@ -12,12 +12,10 @@
 
 
 class Dtypes:
-    params = ["str", "string", "arrow_string"]
+    params = ["str", "string[python]", "string[pyarrow]"]
     param_names = ["dtype"]
 
     def setup(self, dtype):
-        from pandas.core.arrays.string_arrow import ArrowStringDtype  # noqa: F401
-
         try:
             self.s = Series(tm.makeStringIndex(10 ** 5), dtype=dtype)
         except ImportError:
 
@@ -1,11 +1,18 @@
 # Adapted from https://github.com/numba/numba/blob/master/azure-pipelines.yml
 trigger:
-- master
-- 1.2.x
+  branches:
+    include:
+    - master
+    - 1.2.x
+    - 1.3.x
+  paths:
+    exclude:
+    - 'doc/*'
 
 pr:
 - master
 - 1.2.x
+- 1.3.x
 
 variables:
   PYTEST_WORKERS: auto
 
@@ -77,6 +77,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     invgrep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Check for unnecessary random seeds in asv benchmarks' ; echo $MSG
+    invgrep -R --exclude pandas_vb_common.py -E 'np.random.seed' asv_bench/benchmarks/
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
 fi
 
 ### CODE ###
 
@@ -22,7 +22,7 @@ dependencies:
   - numexpr
   - numpy=1.17.3
   - openpyxl
-  - pyarrow=0.17.0
+  - pyarrow=0.17
   - pytables
   - python-dateutil==2.7.3
   - pytz