pandas-dev
diff --git a/‎.github/workflows/ci.yml
+20-23 b/‎.github/workflows/ci.yml
+20-23
diff --git a/‎LICENSES/HAVEN_LICENSE
+21-2 b/‎LICENSES/HAVEN_LICENSE
+21-2
diff --git a/‎asv_bench/benchmarks/algorithms.py
+35-56 b/‎asv_bench/benchmarks/algorithms.py
+35-56
diff --git a/‎asv_bench/benchmarks/arithmetic.py
+30 b/‎asv_bench/benchmarks/arithmetic.py
+30
diff --git a/‎asv_bench/benchmarks/categoricals.py
-3 b/‎asv_bench/benchmarks/categoricals.py
-3
diff --git a/‎asv_bench/benchmarks/frame_ctor.py
+45 b/‎asv_bench/benchmarks/frame_ctor.py
+45
diff --git a/‎asv_bench/benchmarks/index_cached_properties.py
+3 b/‎asv_bench/benchmarks/index_cached_properties.py
+3
diff --git a/‎asv_bench/benchmarks/index_object.py
-8 b/‎asv_bench/benchmarks/index_object.py
-8
diff --git a/‎asv_bench/benchmarks/indexing.py
+5 b/‎asv_bench/benchmarks/indexing.py
+5
diff --git a/‎asv_bench/benchmarks/period.py
-3 b/‎asv_bench/benchmarks/period.py
-3
diff --git a/‎asv_bench/benchmarks/rolling.py
+4-4 b/‎asv_bench/benchmarks/rolling.py
+4-4
diff --git a/‎asv_bench/benchmarks/sparse.py
-1 b/‎asv_bench/benchmarks/sparse.py
-1
@@ -125,35 +125,32 @@ jobs:
     - name: Check ipython directive errors
       run: "! grep -B1 \"^<<<-------------------------------------------------------------------------$\" sphinx.log"
 
-    - name: Merge website and docs
-      run: |
-        mkdir -p pandas_web/docs
-        cp -r web/build/* pandas_web/
-        cp -r doc/build/html/* pandas_web/docs/
-      if: github.event_name == 'push'
-
     - name: Install Rclone
       run: sudo apt install rclone -y
       if: github.event_name == 'push'
 
     - name: Set up Rclone
       run: |
-        RCLONE_CONFIG_PATH=$HOME/.config/rclone/rclone.conf
-        mkdir -p `dirname $RCLONE_CONFIG_PATH`
-        echo "[ovh_cloud_pandas_web]" > $RCLONE_CONFIG_PATH
-        echo "type = swift" >> $RCLONE_CONFIG_PATH
-        echo "env_auth = false" >> $RCLONE_CONFIG_PATH
-        echo "auth_version = 3" >> $RCLONE_CONFIG_PATH
-        echo "auth = https://auth.cloud.ovh.net/v3/" >> $RCLONE_CONFIG_PATH
-        echo "endpoint_type = public" >> $RCLONE_CONFIG_PATH
-        echo "tenant_domain = default" >> $RCLONE_CONFIG_PATH
-        echo "tenant = 2977553886518025" >> $RCLONE_CONFIG_PATH
-        echo "domain = default" >> $RCLONE_CONFIG_PATH
-        echo "user = w4KGs3pmDxpd" >> $RCLONE_CONFIG_PATH
-        echo "key = ${{ secrets.ovh_object_store_key }}" >> $RCLONE_CONFIG_PATH
-        echo "region = BHS" >> $RCLONE_CONFIG_PATH
+        CONF=$HOME/.config/rclone/rclone.conf
+        mkdir -p `dirname $CONF`
+        echo "[ovh_host]" > $CONF
+        echo "type = swift" >> $CONF
+        echo "env_auth = false" >> $CONF
+        echo "auth_version = 3" >> $CONF
+        echo "auth = https://auth.cloud.ovh.net/v3/" >> $CONF
+        echo "endpoint_type = public" >> $CONF
+        echo "tenant_domain = default" >> $CONF
+        echo "tenant = 2977553886518025" >> $CONF
+        echo "domain = default" >> $CONF
+        echo "user = w4KGs3pmDxpd" >> $CONF
+        echo "key = ${{ secrets.ovh_object_store_key }}" >> $CONF
+        echo "region = BHS" >> $CONF
+      if: github.event_name == 'push'
+
+    - name: Sync web with OVH
+      run: rclone sync --exclude pandas-docs/** web/build ovh_host:prod
       if: github.event_name == 'push'
 
-    - name: Sync web
-      run: rclone sync pandas_web ovh_cloud_pandas_web:dev
+    - name: Sync dev docs with OVH
+      run: rclone sync doc/build/html ovh_host:prod/pandas-docs/dev
       if: github.event_name == 'push'
@@ -1,2 +1,21 @@
-YEAR: 2013-2016
-COPYRIGHT HOLDER: Hadley Wickham; RStudio; and Evan Miller
+# MIT License
+
+Copyright (c) 2019 Hadley Wickham; RStudio; and Evan Miller
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -31,83 +31,62 @@ def time_maybe_convert_objects(self):
 
 class Factorize:
 
-    params = [[True, False], ["int", "uint", "float", "string"]]
-    param_names = ["sort", "dtype"]
-
-    def setup(self, sort, dtype):
-        N = 10 ** 5
-        data = {
-            "int": pd.Int64Index(np.arange(N).repeat(5)),
-            "uint": pd.UInt64Index(np.arange(N).repeat(5)),
-            "float": pd.Float64Index(np.random.randn(N).repeat(5)),
-            "string": tm.makeStringIndex(N).repeat(5),
-        }
-        self.idx = data[dtype]
-
-    def time_factorize(self, sort, dtype):
-        self.idx.factorize(sort=sort)
-
-
-class FactorizeUnique:
-
-    params = [[True, False], ["int", "uint", "float", "string"]]
-    param_names = ["sort", "dtype"]
+    params = [
+        [True, False],
+        [True, False],
+        ["int", "uint", "float", "string", "datetime64[ns]", "datetime64[ns, tz]"],
+    ]
+    param_names = ["unique", "sort", "dtype"]
 
-    def setup(self, sort, dtype):
+    def setup(self, unique, sort, dtype):
         N = 10 ** 5
         data = {
             "int": pd.Int64Index(np.arange(N)),
             "uint": pd.UInt64Index(np.arange(N)),
-            "float": pd.Float64Index(np.arange(N)),
+            "float": pd.Float64Index(np.random.randn(N)),
             "string": tm.makeStringIndex(N),
-        }
-        self.idx = data[dtype]
-        assert self.idx.is_unique
-
-    def time_factorize(self, sort, dtype):
+            "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
+            "datetime64[ns, tz]": pd.date_range(
+                "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
+            ),
+        }[dtype]
+        if not unique:
+            data = data.repeat(5)
+        self.idx = data
+
+    def time_factorize(self, unique, sort, dtype):
         self.idx.factorize(sort=sort)
 
 
 class Duplicated:
 
-    params = [["first", "last", False], ["int", "uint", "float", "string"]]
-    param_names = ["keep", "dtype"]
-
-    def setup(self, keep, dtype):
-        N = 10 ** 5
-        data = {
-            "int": pd.Int64Index(np.arange(N).repeat(5)),
-            "uint": pd.UInt64Index(np.arange(N).repeat(5)),
-            "float": pd.Float64Index(np.random.randn(N).repeat(5)),
-            "string": tm.makeStringIndex(N).repeat(5),
-        }
-        self.idx = data[dtype]
-        # cache is_unique
-        self.idx.is_unique
-
-    def time_duplicated(self, keep, dtype):
-        self.idx.duplicated(keep=keep)
-
-
-class DuplicatedUniqueIndex:
-
-    params = ["int", "uint", "float", "string"]
-    param_names = ["dtype"]
+    params = [
+        [True, False],
+        ["first", "last", False],
+        ["int", "uint", "float", "string", "datetime64[ns]", "datetime64[ns, tz]"],
+    ]
+    param_names = ["unique", "keep", "dtype"]
 
-    def setup(self, dtype):
+    def setup(self, unique, keep, dtype):
         N = 10 ** 5
         data = {
             "int": pd.Int64Index(np.arange(N)),
             "uint": pd.UInt64Index(np.arange(N)),
             "float": pd.Float64Index(np.random.randn(N)),
             "string": tm.makeStringIndex(N),
-        }
-        self.idx = data[dtype]
+            "datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
+            "datetime64[ns, tz]": pd.date_range(
+                "2011-01-01", freq="H", periods=N, tz="Asia/Tokyo"
+            ),
+        }[dtype]
+        if not unique:
+            data = data.repeat(5)
+        self.idx = data
         # cache is_unique
         self.idx.is_unique
 
-    def time_duplicated_unique(self, dtype):
-        self.idx.duplicated()
+    def time_duplicated(self, unique, keep, dtype):
+        self.idx.duplicated(keep=keep)
 
 
 class Hashing:
 
@@ -50,6 +50,36 @@ def time_frame_op_with_scalar(self, dtype, scalar, op):
         op(self.df, scalar)
 
 
+class MixedFrameWithSeriesAxis0:
+    params = [
+        [
+            "eq",
+            "ne",
+            "lt",
+            "le",
+            "ge",
+            "gt",
+            "add",
+            "sub",
+            "div",
+            "floordiv",
+            "mul",
+            "pow",
+        ]
+    ]
+    param_names = ["opname"]
+
+    def setup(self, opname):
+        arr = np.arange(10 ** 6).reshape(100, -1)
+        df = DataFrame(arr)
+        df["C"] = 1.0
+        self.df = df
+        self.ser = df[0]
+
+    def time_frame_op_with_series_axis0(self, opname):
+        getattr(self.df, opname)(self.ser, axis=0)
+
+
 class Ops:
 
     params = [[True, False], ["default", 1]]
 
@@ -258,9 +258,6 @@ def setup(self):
     def time_get_loc(self):
         self.index.get_loc(self.category)
 
-    def time_shape(self):
-        self.index.shape
-
     def time_shallow_copy(self):
         self.index._shallow_copy()
 
 
@@ -1,5 +1,6 @@
 import numpy as np
 
+import pandas as pd
 from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range
 
 from .pandas_vb_common import tm
@@ -118,4 +119,48 @@ def time_frame_from_range(self):
         self.df = DataFrame(self.data)
 
 
+class FromArrays:
+
+    goal_time = 0.2
+
+    def setup(self):
+        N_rows = 1000
+        N_cols = 1000
+        self.float_arrays = [np.random.randn(N_rows) for _ in range(N_cols)]
+        self.sparse_arrays = [
+            pd.arrays.SparseArray(np.random.randint(0, 2, N_rows), dtype="float64")
+            for _ in range(N_cols)
+        ]
+        self.int_arrays = [
+            pd.array(np.random.randint(1000, size=N_rows), dtype="Int64")
+            for _ in range(N_cols)
+        ]
+        self.index = pd.Index(range(N_rows))
+        self.columns = pd.Index(range(N_cols))
+
+    def time_frame_from_arrays_float(self):
+        self.df = DataFrame._from_arrays(
+            self.float_arrays,
+            index=self.index,
+            columns=self.columns,
+            verify_integrity=False,
+        )
+
+    def time_frame_from_arrays_int(self):
+        self.df = DataFrame._from_arrays(
+            self.int_arrays,
+            index=self.index,
+            columns=self.columns,
+            verify_integrity=False,
+        )
+
+    def time_frame_from_arrays_sparse(self):
+        self.df = DataFrame._from_arrays(
+            self.sparse_arrays,
+            index=self.index,
+            columns=self.columns,
+            verify_integrity=False,
+        )
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
@@ -7,6 +7,7 @@ class IndexCache:
 
     params = [
         [
+            "CategoricalIndex",
             "DatetimeIndex",
             "Float64Index",
             "IntervalIndex",
@@ -42,6 +43,8 @@ def setup(self, index_type):
             self.idx = pd.Float64Index(range(N))
         elif index_type == "UInt64Index":
             self.idx = pd.UInt64Index(range(N))
+        elif index_type == "CategoricalIndex":
+            self.idx = pd.CategoricalIndex(range(N), range(N))
         else:
             raise ValueError
         assert len(self.idx) == N
 
@@ -55,14 +55,6 @@ def time_datetime_difference_disjoint(self):
         self.datetime_left.difference(self.datetime_right)
 
 
-class Datetime:
-    def setup(self):
-        self.dr = date_range("20000101", freq="D", periods=10000)
-
-    def time_is_dates_only(self):
-        self.dr._is_dates_only
-
-
 class Range:
     def setup(self):
         self.idx_inc = RangeIndex(start=0, stop=10 ** 7, step=3)
 
@@ -1,3 +1,8 @@
+"""
+These benchmarks are for Series and DataFrame indexing methods.  For the
+lower-level methods directly on Index and subclasses, see index_object.py,
+indexing_engine.py, and index_cached.py
+"""
 import warnings
 
 import numpy as np
 
@@ -85,9 +85,6 @@ def setup(self):
     def time_get_loc(self):
         self.index.get_loc(self.period)
 
-    def time_shape(self):
-        self.index.shape
-
     def time_shallow_copy(self):
         self.index._shallow_copy()
 
 
@@ -11,7 +11,7 @@ class Methods:
         ["int", "float"],
         ["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum"],
     )
-    param_names = ["contructor", "window", "dtype", "method"]
+    param_names = ["constructor", "window", "dtype", "method"]
 
     def setup(self, constructor, window, dtype, method):
         N = 10 ** 5
@@ -72,7 +72,7 @@ class ExpandingMethods:
         ["int", "float"],
         ["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum"],
     )
-    param_names = ["contructor", "window", "dtype", "method"]
+    param_names = ["constructor", "window", "dtype", "method"]
 
     def setup(self, constructor, dtype, method):
         N = 10 ** 5
@@ -86,7 +86,7 @@ def time_expanding(self, constructor, dtype, method):
 class EWMMethods:
 
     params = (["DataFrame", "Series"], [10, 1000], ["int", "float"], ["mean", "std"])
-    param_names = ["contructor", "window", "dtype", "method"]
+    param_names = ["constructor", "window", "dtype", "method"]
 
     def setup(self, constructor, window, dtype, method):
         N = 10 ** 5
@@ -104,7 +104,7 @@ class VariableWindowMethods(Methods):
         ["int", "float"],
         ["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum"],
     )
-    param_names = ["contructor", "window", "dtype", "method"]
+    param_names = ["constructor", "window", "dtype", "method"]
 
     def setup(self, constructor, window, dtype, method):
         N = 10 ** 5
 
@@ -45,7 +45,6 @@ def time_sparse_array(self, dense_proportion, fill_value, dtype):
 class SparseDataFrameConstructor:
     def setup(self):
         N = 1000
-        self.arr = np.arange(N)
         self.sparse = scipy.sparse.rand(N, N, 0.005)
 
     def time_from_scipy(self):