pandas-dev · jorisvandenbossche · Jul 4, 2019 · Jun 27, 2019 · Jul 4, 2019 · Jun 27, 2019
diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py
@@ -5,7 +5,7 @@
 import pandas as pd
 from pandas.util import testing as tm
 
-for imp in ['pandas.util', 'pandas.tools.hashing']:
+for imp in ["pandas.util", "pandas.tools.hashing"]:
     try:
         hashing = import_module(imp)
         break
@@ -15,15 +15,17 @@
 
 class Factorize:
 
-    params = [[True, False], ['int', 'uint', 'float', 'string']]
-    param_names = ['sort', 'dtype']
+    params = [[True, False], ["int", "uint", "float", "string"]]
+    param_names = ["sort", "dtype"]
 
     def setup(self, sort, dtype):
-        N = 10**5
-        data = {'int': pd.Int64Index(np.arange(N).repeat(5)),
-                'uint': pd.UInt64Index(np.arange(N).repeat(5)),
-                'float': pd.Float64Index(np.random.randn(N).repeat(5)),
-                'string': tm.makeStringIndex(N).repeat(5)}
+        N = 10 ** 5
+        data = {
+            "int": pd.Int64Index(np.arange(N).repeat(5)),
+            "uint": pd.UInt64Index(np.arange(N).repeat(5)),
+            "float": pd.Float64Index(np.random.randn(N).repeat(5)),
+            "string": tm.makeStringIndex(N).repeat(5),
+        }
         self.idx = data[dtype]
 
     def time_factorize(self, sort, dtype):
@@ -32,15 +34,17 @@ def time_factorize(self, sort, dtype):
 
 class FactorizeUnique:
 
-    params = [[True, False], ['int', 'uint', 'float', 'string']]
-    param_names = ['sort', 'dtype']
+    params = [[True, False], ["int", "uint", "float", "string"]]
+    param_names = ["sort", "dtype"]
 
     def setup(self, sort, dtype):
-        N = 10**5
-        data = {'int': pd.Int64Index(np.arange(N)),
-                'uint': pd.UInt64Index(np.arange(N)),
-                'float': pd.Float64Index(np.arange(N)),
-                'string': tm.makeStringIndex(N)}
+        N = 10 ** 5
+        data = {
+            "int": pd.Int64Index(np.arange(N)),
+            "uint": pd.UInt64Index(np.arange(N)),
+            "float": pd.Float64Index(np.arange(N)),
+            "string": tm.makeStringIndex(N),
+        }
         self.idx = data[dtype]
         assert self.idx.is_unique
 
@@ -50,15 +54,17 @@ def time_factorize(self, sort, dtype):
 
 class Duplicated:
 
-    params = [['first', 'last', False], ['int', 'uint', 'float', 'string']]
-    param_names = ['keep', 'dtype']
+    params = [["first", "last", False], ["int", "uint", "float", "string"]]
+    param_names = ["keep", "dtype"]
 
     def setup(self, keep, dtype):
-        N = 10**5
-        data = {'int': pd.Int64Index(np.arange(N).repeat(5)),
-                'uint': pd.UInt64Index(np.arange(N).repeat(5)),
-                'float': pd.Float64Index(np.random.randn(N).repeat(5)),
-                'string': tm.makeStringIndex(N).repeat(5)}
+        N = 10 ** 5
+        data = {
+            "int": pd.Int64Index(np.arange(N).repeat(5)),
+            "uint": pd.UInt64Index(np.arange(N).repeat(5)),
+            "float": pd.Float64Index(np.random.randn(N).repeat(5)),
+            "string": tm.makeStringIndex(N).repeat(5),
+        }
         self.idx = data[dtype]
         # cache is_unique
         self.idx.is_unique
@@ -69,15 +75,17 @@ def time_duplicated(self, keep, dtype):
 
 class DuplicatedUniqueIndex:
 
-    params = ['int', 'uint', 'float', 'string']
-    param_names = ['dtype']
+    params = ["int", "uint", "float", "string"]
+    param_names = ["dtype"]
 
     def setup(self, dtype):
-        N = 10**5
-        data = {'int': pd.Int64Index(np.arange(N)),
-                'uint': pd.UInt64Index(np.arange(N)),
-                'float': pd.Float64Index(np.random.randn(N)),
-                'string': tm.makeStringIndex(N)}
+        N = 10 ** 5
+        data = {
+            "int": pd.Int64Index(np.arange(N)),
+            "uint": pd.UInt64Index(np.arange(N)),
+            "float": pd.Float64Index(np.random.randn(N)),
+            "string": tm.makeStringIndex(N),
+        }
         self.idx = data[dtype]
         # cache is_unique
         self.idx.is_unique
@@ -87,67 +95,74 @@ def time_duplicated_unique(self, dtype):
 
 
 class Hashing:
-
     def setup_cache(self):
-        N = 10**5
+        N = 10 ** 5
 
         df = pd.DataFrame(
-            {'strings': pd.Series(tm.makeStringIndex(10000).take(
-                np.random.randint(0, 10000, size=N))),
-             'floats': np.random.randn(N),
-             'ints': np.arange(N),
-             'dates': pd.date_range('20110101', freq='s', periods=N),
-             'timedeltas': pd.timedelta_range('1 day', freq='s', periods=N)})
-        df['categories'] = df['strings'].astype('category')
+            {
+                "strings": pd.Series(
+                    tm.makeStringIndex(10000).take(np.random.randint(0, 10000, size=N))
+                ),
+                "floats": np.random.randn(N),
+                "ints": np.arange(N),
+                "dates": pd.date_range("20110101", freq="s", periods=N),
+                "timedeltas": pd.timedelta_range("1 day", freq="s", periods=N),
+            }
+        )
+        df["categories"] = df["strings"].astype("category")
         df.iloc[10:20] = np.nan
         return df
 
     def time_frame(self, df):
         hashing.hash_pandas_object(df)
 
     def time_series_int(self, df):
-        hashing.hash_pandas_object(df['ints'])
+        hashing.hash_pandas_object(df["ints"])
 
     def time_series_string(self, df):
-        hashing.hash_pandas_object(df['strings'])
+        hashing.hash_pandas_object(df["strings"])
 
     def time_series_float(self, df):
-        hashing.hash_pandas_object(df['floats'])
+        hashing.hash_pandas_object(df["floats"])
 
     def time_series_categorical(self, df):
-        hashing.hash_pandas_object(df['categories'])
+        hashing.hash_pandas_object(df["categories"])
 
     def time_series_timedeltas(self, df):
-        hashing.hash_pandas_object(df['timedeltas'])
+        hashing.hash_pandas_object(df["timedeltas"])
 
     def time_series_dates(self, df):
-        hashing.hash_pandas_object(df['dates'])
+        hashing.hash_pandas_object(df["dates"])
 
 
 class Quantile:
-    params = [[0, 0.5, 1],
-              ['linear', 'nearest', 'lower', 'higher', 'midpoint'],
-              ['float', 'int', 'uint']]
-    param_names = ['quantile', 'interpolation', 'dtype']
+    params = [
+        [0, 0.5, 1],
+        ["linear", "nearest", "lower", "higher", "midpoint"],
+        ["float", "int", "uint"],
+    ]
+    param_names = ["quantile", "interpolation", "dtype"]
 
     def setup(self, quantile, interpolation, dtype):
-        N = 10**5
-        data = {'int': np.arange(N),
-                'uint': np.arange(N).astype(np.uint64),
-                'float': np.random.randn(N)}
+        N = 10 ** 5
+        data = {
+            "int": np.arange(N),
+            "uint": np.arange(N).astype(np.uint64),
+            "float": np.random.randn(N),
+        }
         self.idx = pd.Series(data[dtype].repeat(5))
 
     def time_quantile(self, quantile, interpolation, dtype):
         self.idx.quantile(quantile, interpolation=interpolation)
 
 
 class SortIntegerArray:
-    params = [10**3, 10**5]
+    params = [10 ** 3, 10 ** 5]
 
     def setup(self, N):
         data = np.arange(N, dtype=float)
         data[40] = np.nan
-        self.array = pd.array(data, dtype='Int64')
+        self.array = pd.array(data, dtype="Int64")
 
     def time_argsort(self, N):
         self.array.argsort()

diff --git a/asv_bench/benchmarks/attrs_caching.py b/asv_bench/benchmarks/attrs_caching.py
@@ -1,13 +1,13 @@
 import numpy as np
 from pandas import DataFrame
+
 try:
     from pandas.util import cache_readonly
 except ImportError:
     from pandas.util.decorators import cache_readonly
 
 
 class DataFrameAttributes:
-
     def setup(self):
         self.df = DataFrame(np.random.randn(10, 6))
         self.cur_index = self.df.index
@@ -20,14 +20,12 @@ def time_set_index(self):
 
 
 class CacheReadonly:
-
     def setup(self):
-
         class Foo:
-
             @cache_readonly
             def prop(self):
                 return 5
+
         self.obj = Foo()
 
     def time_cache_readonly(self):

diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/binary_ops.py
@@ -1,6 +1,7 @@
 import numpy as np
 from pandas import DataFrame, Series, date_range
 from pandas.core.algorithms import checked_add_with_arr
+
 try:
     import pandas.core.computation.expressions as expr
 except ImportError:
@@ -9,14 +10,14 @@
 
 class Ops:
 
-    params = [[True, False], ['default', 1]]
-    param_names = ['use_numexpr', 'threads']
+    params = [[True, False], ["default", 1]]
+    param_names = ["use_numexpr", "threads"]
 
     def setup(self, use_numexpr, threads):
         self.df = DataFrame(np.random.randn(20000, 100))
         self.df2 = DataFrame(np.random.randn(20000, 100))
 
-        if threads != 'default':
+        if threads != "default":
             expr.set_numexpr_threads(threads)
         if not use_numexpr:
             expr.set_use_numexpr(False)
@@ -39,18 +40,21 @@ def teardown(self, use_numexpr, threads):
 
 
 class Ops2:
-
     def setup(self):
-        N = 10**3
+        N = 10 ** 3
         self.df = DataFrame(np.random.randn(N, N))
         self.df2 = DataFrame(np.random.randn(N, N))
 
-        self.df_int = DataFrame(np.random.randint(np.iinfo(np.int16).min,
-                                                  np.iinfo(np.int16).max,
-                                                  size=(N, N)))
-        self.df2_int = DataFrame(np.random.randint(np.iinfo(np.int16).min,
-                                                   np.iinfo(np.int16).max,
-                                                   size=(N, N)))
+        self.df_int = DataFrame(
+            np.random.randint(
+                np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(N, N)
+            )
+        )
+        self.df2_int = DataFrame(
+            np.random.randint(
+                np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(N, N)
+            )
+        )
 
         self.s = Series(np.random.randn(N))
 
@@ -90,16 +94,16 @@ def time_frame_series_dot(self):
 
 class Timeseries:
 
-    params = [None, 'US/Eastern']
-    param_names = ['tz']
+    params = [None, "US/Eastern"]
+    param_names = ["tz"]
 
     def setup(self, tz):
-        N = 10**6
+        N = 10 ** 6
         halfway = (N // 2) - 1
-        self.s = Series(date_range('20010101', periods=N, freq='T', tz=tz))
+        self.s = Series(date_range("20010101", periods=N, freq="T", tz=tz))
         self.ts = self.s[halfway]
 
-        self.s2 = Series(date_range('20010101', periods=N, freq='s', tz=tz))
+        self.s2 = Series(date_range("20010101", periods=N, freq="s", tz=tz))
 
     def time_series_timestamp_compare(self, tz):
         self.s <= self.ts
@@ -117,20 +121,19 @@ def time_timestamp_ops_diff_with_shift(self, tz):
 class AddOverflowScalar:
 
     params = [1, -1, 0]
-    param_names = ['scalar']
+    param_names = ["scalar"]
 
     def setup(self, scalar):
-        N = 10**6
+        N = 10 ** 6
         self.arr = np.arange(N)
 
     def time_add_overflow_scalar(self, scalar):
         checked_add_with_arr(self.arr, scalar)
 
 
 class AddOverflowArray:
-
     def setup(self):
-        N = 10**6
+        N = 10 ** 6
         self.arr = np.arange(N)
         self.arr_rev = np.arange(-N, 0)
         self.arr_mixed = np.array([1, -1]).repeat(N / 2)
@@ -144,12 +147,12 @@ def time_add_overflow_arr_mask_nan(self):
         checked_add_with_arr(self.arr, self.arr_mixed, arr_mask=self.arr_nan_1)
 
     def time_add_overflow_b_mask_nan(self):
-        checked_add_with_arr(self.arr, self.arr_mixed,
-                             b_mask=self.arr_nan_1)
+        checked_add_with_arr(self.arr, self.arr_mixed, b_mask=self.arr_nan_1)
 
     def time_add_overflow_both_arg_nan(self):
-        checked_add_with_arr(self.arr, self.arr_mixed, arr_mask=self.arr_nan_1,
-                             b_mask=self.arr_nan_2)
+        checked_add_with_arr(
+            self.arr, self.arr_mixed, arr_mask=self.arr_nan_1, b_mask=self.arr_nan_2
+        )
 
 
 from .pandas_vb_common import setup  # noqa: F401