pandas-dev · mroeschke · Nov 27, 2018 · Nov 26, 2018 · Nov 26, 2018 · Nov 26, 2018
diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/binary_ops.py
@@ -52,6 +52,8 @@ def setup(self):
                                                    np.iinfo(np.int16).max,
                                                    size=(N, N)))
 
+        self.s = Series(np.random.randn(N))
+
     # Division
 
     def time_frame_float_div(self):
@@ -74,6 +76,17 @@ def time_frame_int_mod(self):
     def time_frame_float_mod(self):
         self.df % self.df2
 
+    # Dot product
+
+    def time_frame_dot(self):
+        self.df.dot(self.df2)
+
+    def time_series_dot(self):
+        self.s.dot(self.s)
+
+    def time_frame_series_dot(self):
+        self.df.dot(self.s)
+
 
 class Timeseries(object):
 

diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
@@ -69,6 +69,36 @@ def time_reindex_upcast(self):
         self.df2.reindex(np.random.permutation(range(1200)))
 
 
+class Rename(object):
+
+    def setup(self):
+        N = 10**3
+        self.df = DataFrame(np.random.randn(N * 10, N))
+        self.idx = np.arange(4 * N, 7 * N)
+        self.dict_idx = {k: k for k in self.idx}
+        self.df2 = DataFrame(
+            {c: {0: np.random.randint(0, 2, N).astype(np.bool_),
+                 1: np.random.randint(0, N, N).astype(np.int16),
+                 2: np.random.randint(0, N, N).astype(np.int32),
+                 3: np.random.randint(0, N, N).astype(np.int64)}
+                [np.random.randint(0, 4)] for c in range(N)})
+
+    def time_rename_single(self):
+        self.df.rename({0: 0})
+
+    def time_rename_axis0(self):
+        self.df.rename(self.dict_idx)
+
+    def time_rename_axis1(self):
+        self.df.rename(columns=self.dict_idx)
+
+    def time_rename_both_axes(self):
+        self.df.rename(index=self.dict_idx, columns=self.dict_idx)
+
+    def time_dict_rename_both_axes(self):
+        self.df.rename(index=self.dict_idx, columns=self.dict_idx)
+
+
 class Iteration(object):
 
     def setup(self):

diff --git a/asv_bench/benchmarks/plotting.py b/asv_bench/benchmarks/plotting.py
@@ -8,17 +8,48 @@
 matplotlib.use('Agg')
 
 
-class Plotting(object):
-
-    def setup(self):
-        self.s = Series(np.random.randn(1000000))
-        self.df = DataFrame({'col': self.s})
-
-    def time_series_plot(self):
-        self.s.plot()
-
-    def time_frame_plot(self):
-        self.df.plot()
+class SeriesPlotting(object):
+    params = [['line', 'bar', 'area', 'barh', 'hist', 'kde', 'pie']]
+    param_names = ['kind']
+
+    def setup(self, kind):
+        if kind in ['bar', 'barh', 'pie']:
+            n = 100
+        elif kind in ['kde']:
+            n = 10000
+        else:
+            n = 1000000
+
+        self.s = Series(np.random.randn(n))
+        if kind in ['area', 'pie']:
+            self.s = self.s.abs()
+
+    def time_series_plot(self, kind):
+        self.s.plot(kind=kind)
+
+
+class FramePlotting(object):
+    params = [['line', 'bar', 'area', 'barh', 'hist', 'kde', 'pie', 'scatter',
+               'hexbin']]
+    param_names = ['kind']
+
+    def setup(self, kind):
+        if kind in ['bar', 'barh', 'pie']:
+            n = 100
+        elif kind in ['kde', 'scatter', 'hexbin']:
+            n = 10000
+        else:
+            n = 1000000
+
+        self.x = Series(np.random.randn(n))
+        self.y = Series(np.random.randn(n))
+        if kind in ['area', 'pie']:
+            self.x = self.x.abs()
+            self.y = self.y.abs()
+        self.df = DataFrame({'x': self.x, 'y': self.y})
+
+    def time_frame_plot(self, kind):
+        self.df.plot(x='x', y='y', kind=kind)
 
 
 class TimeseriesPlotting(object):

diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py
@@ -146,4 +146,42 @@ def time_get_dummies_1d_sparse(self):
         pd.get_dummies(self.s, sparse=True)
 
 
+class Cut(object):
+    params = [[4, 10, 1000]]
+    param_names = ['bins']
+
+    def setup(self, bins):
+        N = 10**5
+        self.int_series = pd.Series(np.arange(N).repeat(5))
+        self.float_series = pd.Series(np.random.randn(N).repeat(5))
+        self.timedelta_series = pd.Series(np.random.randint(N, size=N),
+                                          dtype='timedelta64[ns]')
+        self.datetime_series = pd.Series(np.random.randint(N, size=N),
+                                         dtype='datetime64[ns]')
+
+    def time_cut_int(self, bins):
+        pd.cut(self.int_series, bins)
+
+    def time_cut_float(self, bins):
+        pd.cut(self.float_series, bins)
+
+    def time_cut_timedelta(self, bins):
+        pd.cut(self.timedelta_series, bins)
+
+    def time_cut_datetime(self, bins):
+        pd.cut(self.datetime_series, bins)
+
+    def time_qcut_int(self, bins):
+        pd.qcut(self.int_series, bins)
+
+    def time_qcut_float(self, bins):
+        pd.qcut(self.float_series, bins)
+
+    def time_qcut_timedelta(self, bins):
+        pd.qcut(self.timedelta_series, bins)
+
+    def time_qcut_datetime(self, bins):
+        pd.qcut(self.datetime_series, bins)
+
+
 from .pandas_vb_common import setup  # noqa: F401
diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
@@ -21,6 +21,42 @@ def time_rolling(self, constructor, window, dtype, method):
         getattr(self.roll, method)()
 
 
+class ExpandingMethods(object):
+
+    sample_time = 0.2
+    params = (['DataFrame', 'Series'],
+              ['int', 'float'],
+              ['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt',
+               'sum'])
+    param_names = ['contructor', 'window', 'dtype', 'method']
+
+    def setup(self, constructor, dtype, method):
+        N = 10**5
+        arr = (100 * np.random.random(N)).astype(dtype)
+        self.expanding = getattr(pd, constructor)(arr).expanding()
+
+    def time_expanding(self, constructor, dtype, method):
+        getattr(self.expanding, method)()
+
+
+class EWMMethods(object):
+
+    sample_time = 0.2
+    params = (['DataFrame', 'Series'],
+              [10, 1000],
+              ['int', 'float'],
+              ['mean', 'std'])
+    param_names = ['contructor', 'window', 'dtype', 'method']
+
+    def setup(self, constructor, window, dtype, method):
+        N = 10**5
+        arr = (100 * np.random.random(N)).astype(dtype)
+        self.ewm = getattr(pd, constructor)(arr).ewm(halflife=window)
+
+    def time_ewm(self, constructor, window, dtype, method):
+        getattr(self.ewm, method)()
+
+
 class VariableWindowMethods(Methods):
     sample_time = 0.2
     params = (['DataFrame', 'Series'],

diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py
@@ -96,14 +96,42 @@ def time_average_old(self, constructor, pct):
 
 class Correlation(object):
 
-    params = ['spearman', 'kendall', 'pearson']
-    param_names = ['method']
+    params = [['spearman', 'kendall', 'pearson'], [True, False]]
+    param_names = ['method', 'use_bottleneck']
 
-    def setup(self, method):
+    def setup(self, method, use_bottleneck):
+        try:
+            pd.options.compute.use_bottleneck = use_bottleneck
+        except TypeError:
+            from pandas.core import nanops
+            nanops._USE_BOTTLENECK = use_bottleneck
         self.df = pd.DataFrame(np.random.randn(1000, 30))
+        self.s = pd.Series(np.random.randn(1000))
+        self.s2 = pd.Series(np.random.randn(1000))
 
-    def time_corr(self, method):
+    def time_corr(self, method, use_bottleneck):
         self.df.corr(method=method)
 
+    def time_corr_series(self, method, use_bottleneck):
+        self.s.corr(self.s2, method=method)
+
+
+class Covariance(object):
+
+    params = [[True, False]]
+    param_names = ['use_bottleneck']
+
+    def setup(self, use_bottleneck):
+        try:
+            pd.options.compute.use_bottleneck = use_bottleneck
+        except TypeError:
+            from pandas.core import nanops
+            nanops._USE_BOTTLENECK = use_bottleneck
+        self.s = pd.Series(np.random.randn(100000))
+        self.s2 = pd.Series(np.random.randn(100000))
+
+    def time_cov_series(self, use_bottleneck):
+        self.s.cov(self.s2)
+
 
 from .pandas_vb_common import setup  # noqa: F401
diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py
@@ -26,21 +26,42 @@ def time_extract(self):
     def time_findall(self):
         self.s.str.findall('[A-Z]+')
 
+    def time_find(self):
+        self.s.str.find('[A-Z]+')
+
+    def time_rfind(self):
+        self.s.str.rfind('[A-Z]+')
+
     def time_get(self):
         self.s.str.get(0)
 
     def time_len(self):
         self.s.str.len()
 
+    def time_join(self):
+        self.s.str.join(' ')
+
     def time_match(self):
         self.s.str.match('A')
 
+    def time_normalize(self):
+        self.s.str.normalize('NFC')
+
     def time_pad(self):
         self.s.str.pad(100, side='both')
 
+    def time_partition(self):
+        self.s.str.partition('A')
+
+    def time_rpartition(self):
+        self.s.str.rpartition('A')
+
     def time_replace(self):
         self.s.str.replace('A', '\x01\x01')
 
+    def time_translate(self):
+        self.s.str.translate({'A': '\x01\x01'})
+
     def time_slice(self):
         self.s.str.slice(5, 15, 2)
 
@@ -65,6 +86,12 @@ def time_upper(self):
     def time_lower(self):
         self.s.str.lower()
 
+    def time_wrap(self):
+        self.s.str.wrap(10)
+
+    def time_zfill(self):
+        self.s.str.zfill(10)
+
 
 class Repeat(object):
 
@@ -129,6 +156,9 @@ def setup(self, expand):
     def time_split(self, expand):
         self.s.str.split('--', expand=expand)
 
+    def time_rsplit(self, expand):
+        self.s.str.rsplit('--', expand=expand)
+
 
 class Dummies(object):
 

diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py
@@ -1,7 +1,8 @@
 import datetime
 
 import numpy as np
-from pandas import Series, timedelta_range, to_timedelta, Timestamp, Timedelta
+from pandas import Series, timedelta_range, to_timedelta, Timestamp, \
+    Timedelta, TimedeltaIndex, DataFrame
 
 
 class TimedeltaConstructor(object):
@@ -116,3 +117,36 @@ def time_timedelta_microseconds(self, series):
 
     def time_timedelta_nanoseconds(self, series):
         series.dt.nanoseconds
+
+
+class TimedeltaIndexing(object):
+
+    def setup(self):
+        self.index = TimedeltaIndex(start='1985', periods=1000, freq='D')
+        self.index2 = TimedeltaIndex(start='1986', periods=1000, freq='D')
+        self.series = Series(range(1000), index=self.index)
+        self.timedelta = self.index[500]
+
+    def time_get_loc(self):
+        self.index.get_loc(self.timedelta)
+
+    def time_shape(self):
+        self.index.shape
+
+    def time_shallow_copy(self):
+        self.index._shallow_copy()
+
+    def time_series_loc(self):
+        self.series.loc[self.timedelta]
+
+    def time_align(self):
+        DataFrame({'a': self.series, 'b': self.series[:500]})
+
+    def time_intersection(self):
+        self.index.intersection(self.index2)
+
+    def time_union(self):
+        self.index.union(self.index2)
+
+    def time_unique(self):
+        self.index.unique()