|
1 | 1 | import operator
|
| 2 | +import warnings |
2 | 3 |
|
3 | 4 | import numpy as np
|
4 | 5 |
|
5 |
| -from pandas import DataFrame, Series, date_range |
| 6 | +import pandas as pd |
| 7 | +from pandas import DataFrame, Series, Timestamp, date_range, to_timedelta |
| 8 | +import pandas._testing as tm |
6 | 9 | from pandas.core.algorithms import checked_add_with_arr
|
7 | 10 |
|
| 11 | +from .pandas_vb_common import numeric_dtypes |
| 12 | + |
8 | 13 | try:
|
9 | 14 | import pandas.core.computation.expressions as expr
|
10 | 15 | except ImportError:
|
11 | 16 | import pandas.computation.expressions as expr
|
| 17 | +try: |
| 18 | + import pandas.tseries.holiday |
| 19 | +except ImportError: |
| 20 | + pass |
12 | 21 |
|
13 | 22 |
|
14 | 23 | class IntFrameWithScalar:
|
@@ -151,6 +160,110 @@ def time_timestamp_ops_diff_with_shift(self, tz):
|
151 | 160 | self.s - self.s.shift()
|
152 | 161 |
|
153 | 162 |
|
| 163 | +class IrregularOps: |
| 164 | + def setup(self): |
| 165 | + N = 10 ** 5 |
| 166 | + idx = date_range(start="1/1/2000", periods=N, freq="s") |
| 167 | + s = Series(np.random.randn(N), index=idx) |
| 168 | + self.left = s.sample(frac=1) |
| 169 | + self.right = s.sample(frac=1) |
| 170 | + |
| 171 | + def time_add(self): |
| 172 | + self.left + self.right |
| 173 | + |
| 174 | + |
| 175 | +class TimedeltaOps: |
| 176 | + def setup(self): |
| 177 | + self.td = to_timedelta(np.arange(1000000)) |
| 178 | + self.ts = Timestamp("2000") |
| 179 | + |
| 180 | + def time_add_td_ts(self): |
| 181 | + self.td + self.ts |
| 182 | + |
| 183 | + |
| 184 | +class CategoricalComparisons: |
| 185 | + params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"] |
| 186 | + param_names = ["op"] |
| 187 | + |
| 188 | + def setup(self, op): |
| 189 | + N = 10 ** 5 |
| 190 | + self.cat = pd.Categorical(list("aabbcd") * N, ordered=True) |
| 191 | + |
| 192 | + def time_categorical_op(self, op): |
| 193 | + getattr(self.cat, op)("b") |
| 194 | + |
| 195 | + |
| 196 | +class IndexArithmetic: |
| 197 | + |
| 198 | + params = ["float", "int"] |
| 199 | + param_names = ["dtype"] |
| 200 | + |
| 201 | + def setup(self, dtype): |
| 202 | + N = 10 ** 6 |
| 203 | + indexes = {"int": "makeIntIndex", "float": "makeFloatIndex"} |
| 204 | + self.index = getattr(tm, indexes[dtype])(N) |
| 205 | + |
| 206 | + def time_add(self, dtype): |
| 207 | + self.index + 2 |
| 208 | + |
| 209 | + def time_subtract(self, dtype): |
| 210 | + self.index - 2 |
| 211 | + |
| 212 | + def time_multiply(self, dtype): |
| 213 | + self.index * 2 |
| 214 | + |
| 215 | + def time_divide(self, dtype): |
| 216 | + self.index / 2 |
| 217 | + |
| 218 | + def time_modulo(self, dtype): |
| 219 | + self.index % 2 |
| 220 | + |
| 221 | + |
| 222 | +class NumericInferOps: |
| 223 | + # from GH 7332 |
| 224 | + params = numeric_dtypes |
| 225 | + param_names = ["dtype"] |
| 226 | + |
| 227 | + def setup(self, dtype): |
| 228 | + N = 5 * 10 ** 5 |
| 229 | + self.df = DataFrame( |
| 230 | + {"A": np.arange(N).astype(dtype), "B": np.arange(N).astype(dtype)} |
| 231 | + ) |
| 232 | + |
| 233 | + def time_add(self, dtype): |
| 234 | + self.df["A"] + self.df["B"] |
| 235 | + |
| 236 | + def time_subtract(self, dtype): |
| 237 | + self.df["A"] - self.df["B"] |
| 238 | + |
| 239 | + def time_multiply(self, dtype): |
| 240 | + self.df["A"] * self.df["B"] |
| 241 | + |
| 242 | + def time_divide(self, dtype): |
| 243 | + self.df["A"] / self.df["B"] |
| 244 | + |
| 245 | + def time_modulo(self, dtype): |
| 246 | + self.df["A"] % self.df["B"] |
| 247 | + |
| 248 | + |
| 249 | +class DateInferOps: |
| 250 | + # from GH 7332 |
| 251 | + def setup_cache(self): |
| 252 | + N = 5 * 10 ** 5 |
| 253 | + df = DataFrame({"datetime64": np.arange(N).astype("datetime64[ms]")}) |
| 254 | + df["timedelta"] = df["datetime64"] - df["datetime64"] |
| 255 | + return df |
| 256 | + |
| 257 | + def time_subtract_datetimes(self, df): |
| 258 | + df["datetime64"] - df["datetime64"] |
| 259 | + |
| 260 | + def time_timedelta_plus_datetime(self, df): |
| 261 | + df["timedelta"] + df["datetime64"] |
| 262 | + |
| 263 | + def time_add_timedeltas(self, df): |
| 264 | + df["timedelta"] + df["timedelta"] |
| 265 | + |
| 266 | + |
154 | 267 | class AddOverflowScalar:
|
155 | 268 |
|
156 | 269 | params = [1, -1, 0]
|
@@ -188,4 +301,68 @@ def time_add_overflow_both_arg_nan(self):
|
188 | 301 | )
|
189 | 302 |
|
190 | 303 |
|
| 304 | +hcal = pd.tseries.holiday.USFederalHolidayCalendar() |
| 305 | +# These offsets currently raise a NotImplimentedError with .apply_index() |
| 306 | +non_apply = [ |
| 307 | + pd.offsets.Day(), |
| 308 | + pd.offsets.BYearEnd(), |
| 309 | + pd.offsets.BYearBegin(), |
| 310 | + pd.offsets.BQuarterEnd(), |
| 311 | + pd.offsets.BQuarterBegin(), |
| 312 | + pd.offsets.BMonthEnd(), |
| 313 | + pd.offsets.BMonthBegin(), |
| 314 | + pd.offsets.CustomBusinessDay(), |
| 315 | + pd.offsets.CustomBusinessDay(calendar=hcal), |
| 316 | + pd.offsets.CustomBusinessMonthBegin(calendar=hcal), |
| 317 | + pd.offsets.CustomBusinessMonthEnd(calendar=hcal), |
| 318 | + pd.offsets.CustomBusinessMonthEnd(calendar=hcal), |
| 319 | +] |
| 320 | +other_offsets = [ |
| 321 | + pd.offsets.YearEnd(), |
| 322 | + pd.offsets.YearBegin(), |
| 323 | + pd.offsets.QuarterEnd(), |
| 324 | + pd.offsets.QuarterBegin(), |
| 325 | + pd.offsets.MonthEnd(), |
| 326 | + pd.offsets.MonthBegin(), |
| 327 | + pd.offsets.DateOffset(months=2, days=2), |
| 328 | + pd.offsets.BusinessDay(), |
| 329 | + pd.offsets.SemiMonthEnd(), |
| 330 | + pd.offsets.SemiMonthBegin(), |
| 331 | +] |
| 332 | +offsets = non_apply + other_offsets |
| 333 | + |
| 334 | + |
| 335 | +class OffsetArrayArithmetic: |
| 336 | + |
| 337 | + params = offsets |
| 338 | + param_names = ["offset"] |
| 339 | + |
| 340 | + def setup(self, offset): |
| 341 | + N = 10000 |
| 342 | + rng = pd.date_range(start="1/1/2000", periods=N, freq="T") |
| 343 | + self.rng = rng |
| 344 | + self.ser = pd.Series(rng) |
| 345 | + |
| 346 | + def time_add_series_offset(self, offset): |
| 347 | + with warnings.catch_warnings(record=True): |
| 348 | + self.ser + offset |
| 349 | + |
| 350 | + def time_add_dti_offset(self, offset): |
| 351 | + with warnings.catch_warnings(record=True): |
| 352 | + self.rng + offset |
| 353 | + |
| 354 | + |
| 355 | +class ApplyIndex: |
| 356 | + params = other_offsets |
| 357 | + param_names = ["offset"] |
| 358 | + |
| 359 | + def setup(self, offset): |
| 360 | + N = 10000 |
| 361 | + rng = pd.date_range(start="1/1/2000", periods=N, freq="T") |
| 362 | + self.rng = rng |
| 363 | + |
| 364 | + def time_apply_index(self, offset): |
| 365 | + offset.apply_index(self.rng) |
| 366 | + |
| 367 | + |
191 | 368 | from .pandas_vb_common import setup # noqa: F401 isort:skip
|
0 commit comments