|
2 | 2 |
|
3 | 3 | import numpy as np
|
4 | 4 |
|
5 |
| -from pandas import NaT, Series, date_range |
| 5 | +from pandas import Categorical, NaT, Series, date_range |
6 | 6 |
|
7 | 7 | from .pandas_vb_common import tm
|
8 | 8 |
|
@@ -36,6 +36,28 @@ def time_isin(self, dtypes):
|
36 | 36 | self.s.isin(self.values)
|
37 | 37 |
|
38 | 38 |
|
| 39 | +class IsInDatetime64: |
| 40 | + def setup(self): |
| 41 | + dti = date_range( |
| 42 | + start=datetime(2015, 10, 26), end=datetime(2016, 1, 1), freq="50s" |
| 43 | + ) |
| 44 | + self.ser = Series(dti) |
| 45 | + self.subset = self.ser._values[::3] |
| 46 | + self.cat_subset = Categorical(self.subset) |
| 47 | + |
| 48 | + def time_isin(self): |
| 49 | + self.ser.isin(self.subset) |
| 50 | + |
| 51 | + def time_isin_cat_values(self): |
| 52 | + self.ser.isin(self.cat_subset) |
| 53 | + |
| 54 | + def time_isin_mismatched_dtype(self): |
| 55 | + self.ser.isin([1, 2]) |
| 56 | + |
| 57 | + def time_isin_empty(self): |
| 58 | + self.ser.isin([]) |
| 59 | + |
| 60 | + |
39 | 61 | class IsInFloat64:
|
40 | 62 | def setup(self):
|
41 | 63 | self.small = Series([1, 2], dtype=np.float64)
|
@@ -90,6 +112,55 @@ def time_isin_long_series_long_values_floats(self):
|
90 | 112 | self.s_long_floats.isin(self.vals_long_floats)
|
91 | 113 |
|
92 | 114 |
|
| 115 | +class IsInLongSeriesLookUpDominates: |
| 116 | + params = [ |
| 117 | + ["int64", "int32", "float64", "float32", "object"], |
| 118 | + [5, 1000], |
| 119 | + ["random_hits", "random_misses", "monotone_hits", "monotone_misses"], |
| 120 | + ] |
| 121 | + param_names = ["dtype", "MaxNumber", "series_type"] |
| 122 | + |
| 123 | + def setup(self, dtype, MaxNumber, series_type): |
| 124 | + N = 10 ** 7 |
| 125 | + if series_type == "random_hits": |
| 126 | + np.random.seed(42) |
| 127 | + array = np.random.randint(0, MaxNumber, N) |
| 128 | + if series_type == "random_misses": |
| 129 | + np.random.seed(42) |
| 130 | + array = np.random.randint(0, MaxNumber, N) + MaxNumber |
| 131 | + if series_type == "monotone_hits": |
| 132 | + array = np.repeat(np.arange(MaxNumber), N // MaxNumber) |
| 133 | + if series_type == "monotone_misses": |
| 134 | + array = np.arange(N) + MaxNumber |
| 135 | + self.series = Series(array).astype(dtype) |
| 136 | + self.values = np.arange(MaxNumber).astype(dtype) |
| 137 | + |
| 138 | + def time_isin(self, dtypes, MaxNumber, series_type): |
| 139 | + self.series.isin(self.values) |
| 140 | + |
| 141 | + |
| 142 | +class IsInLongSeriesValuesDominate: |
| 143 | + params = [ |
| 144 | + ["int64", "int32", "float64", "float32", "object"], |
| 145 | + ["random", "monotone"], |
| 146 | + ] |
| 147 | + param_names = ["dtype", "series_type"] |
| 148 | + |
| 149 | + def setup(self, dtype, series_type): |
| 150 | + N = 10 ** 7 |
| 151 | + if series_type == "random": |
| 152 | + np.random.seed(42) |
| 153 | + vals = np.random.randint(0, 10 * N, N) |
| 154 | + if series_type == "monotone": |
| 155 | + vals = np.arange(N) |
| 156 | + self.values = vals.astype(dtype) |
| 157 | + M = 10 ** 6 + 1 |
| 158 | + self.series = Series(np.arange(M)).astype(dtype) |
| 159 | + |
| 160 | + def time_isin(self, dtypes, series_type): |
| 161 | + self.series.isin(self.values) |
| 162 | + |
| 163 | + |
93 | 164 | class NSort:
|
94 | 165 |
|
95 | 166 | params = ["first", "last", "all"]
|
|
0 commit comments