Skip to content

Commit d67242c

Browse files
committed
Merge remote-tracking branch 'upstream/master' into hey-everyone
2 parents f5d3922 + aa6f241 commit d67242c

File tree

210 files changed

+34806
-4984
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

210 files changed

+34806
-4984
lines changed

asv_bench/benchmarks/binary_ops.py renamed to asv_bench/benchmarks/arithmetic.py

+178-1
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,23 @@
11
import operator
2+
import warnings
23

34
import numpy as np
45

5-
from pandas import DataFrame, Series, date_range
6+
import pandas as pd
7+
from pandas import DataFrame, Series, Timestamp, date_range, to_timedelta
8+
import pandas._testing as tm
69
from pandas.core.algorithms import checked_add_with_arr
710

11+
from .pandas_vb_common import numeric_dtypes
12+
813
try:
914
import pandas.core.computation.expressions as expr
1015
except ImportError:
1116
import pandas.computation.expressions as expr
17+
try:
18+
import pandas.tseries.holiday
19+
except ImportError:
20+
pass
1221

1322

1423
class IntFrameWithScalar:
@@ -151,6 +160,110 @@ def time_timestamp_ops_diff_with_shift(self, tz):
151160
self.s - self.s.shift()
152161

153162

163+
class IrregularOps:
164+
def setup(self):
165+
N = 10 ** 5
166+
idx = date_range(start="1/1/2000", periods=N, freq="s")
167+
s = Series(np.random.randn(N), index=idx)
168+
self.left = s.sample(frac=1)
169+
self.right = s.sample(frac=1)
170+
171+
def time_add(self):
172+
self.left + self.right
173+
174+
175+
class TimedeltaOps:
176+
def setup(self):
177+
self.td = to_timedelta(np.arange(1000000))
178+
self.ts = Timestamp("2000")
179+
180+
def time_add_td_ts(self):
181+
self.td + self.ts
182+
183+
184+
class CategoricalComparisons:
185+
params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"]
186+
param_names = ["op"]
187+
188+
def setup(self, op):
189+
N = 10 ** 5
190+
self.cat = pd.Categorical(list("aabbcd") * N, ordered=True)
191+
192+
def time_categorical_op(self, op):
193+
getattr(self.cat, op)("b")
194+
195+
196+
class IndexArithmetic:
197+
198+
params = ["float", "int"]
199+
param_names = ["dtype"]
200+
201+
def setup(self, dtype):
202+
N = 10 ** 6
203+
indexes = {"int": "makeIntIndex", "float": "makeFloatIndex"}
204+
self.index = getattr(tm, indexes[dtype])(N)
205+
206+
def time_add(self, dtype):
207+
self.index + 2
208+
209+
def time_subtract(self, dtype):
210+
self.index - 2
211+
212+
def time_multiply(self, dtype):
213+
self.index * 2
214+
215+
def time_divide(self, dtype):
216+
self.index / 2
217+
218+
def time_modulo(self, dtype):
219+
self.index % 2
220+
221+
222+
class NumericInferOps:
223+
# from GH 7332
224+
params = numeric_dtypes
225+
param_names = ["dtype"]
226+
227+
def setup(self, dtype):
228+
N = 5 * 10 ** 5
229+
self.df = DataFrame(
230+
{"A": np.arange(N).astype(dtype), "B": np.arange(N).astype(dtype)}
231+
)
232+
233+
def time_add(self, dtype):
234+
self.df["A"] + self.df["B"]
235+
236+
def time_subtract(self, dtype):
237+
self.df["A"] - self.df["B"]
238+
239+
def time_multiply(self, dtype):
240+
self.df["A"] * self.df["B"]
241+
242+
def time_divide(self, dtype):
243+
self.df["A"] / self.df["B"]
244+
245+
def time_modulo(self, dtype):
246+
self.df["A"] % self.df["B"]
247+
248+
249+
class DateInferOps:
250+
# from GH 7332
251+
def setup_cache(self):
252+
N = 5 * 10 ** 5
253+
df = DataFrame({"datetime64": np.arange(N).astype("datetime64[ms]")})
254+
df["timedelta"] = df["datetime64"] - df["datetime64"]
255+
return df
256+
257+
def time_subtract_datetimes(self, df):
258+
df["datetime64"] - df["datetime64"]
259+
260+
def time_timedelta_plus_datetime(self, df):
261+
df["timedelta"] + df["datetime64"]
262+
263+
def time_add_timedeltas(self, df):
264+
df["timedelta"] + df["timedelta"]
265+
266+
154267
class AddOverflowScalar:
155268

156269
params = [1, -1, 0]
@@ -188,4 +301,68 @@ def time_add_overflow_both_arg_nan(self):
188301
)
189302

190303

304+
hcal = pd.tseries.holiday.USFederalHolidayCalendar()
305+
# These offsets currently raise a NotImplimentedError with .apply_index()
306+
non_apply = [
307+
pd.offsets.Day(),
308+
pd.offsets.BYearEnd(),
309+
pd.offsets.BYearBegin(),
310+
pd.offsets.BQuarterEnd(),
311+
pd.offsets.BQuarterBegin(),
312+
pd.offsets.BMonthEnd(),
313+
pd.offsets.BMonthBegin(),
314+
pd.offsets.CustomBusinessDay(),
315+
pd.offsets.CustomBusinessDay(calendar=hcal),
316+
pd.offsets.CustomBusinessMonthBegin(calendar=hcal),
317+
pd.offsets.CustomBusinessMonthEnd(calendar=hcal),
318+
pd.offsets.CustomBusinessMonthEnd(calendar=hcal),
319+
]
320+
other_offsets = [
321+
pd.offsets.YearEnd(),
322+
pd.offsets.YearBegin(),
323+
pd.offsets.QuarterEnd(),
324+
pd.offsets.QuarterBegin(),
325+
pd.offsets.MonthEnd(),
326+
pd.offsets.MonthBegin(),
327+
pd.offsets.DateOffset(months=2, days=2),
328+
pd.offsets.BusinessDay(),
329+
pd.offsets.SemiMonthEnd(),
330+
pd.offsets.SemiMonthBegin(),
331+
]
332+
offsets = non_apply + other_offsets
333+
334+
335+
class OffsetArrayArithmetic:
336+
337+
params = offsets
338+
param_names = ["offset"]
339+
340+
def setup(self, offset):
341+
N = 10000
342+
rng = pd.date_range(start="1/1/2000", periods=N, freq="T")
343+
self.rng = rng
344+
self.ser = pd.Series(rng)
345+
346+
def time_add_series_offset(self, offset):
347+
with warnings.catch_warnings(record=True):
348+
self.ser + offset
349+
350+
def time_add_dti_offset(self, offset):
351+
with warnings.catch_warnings(record=True):
352+
self.rng + offset
353+
354+
355+
class ApplyIndex:
356+
params = other_offsets
357+
param_names = ["offset"]
358+
359+
def setup(self, offset):
360+
N = 10000
361+
rng = pd.date_range(start="1/1/2000", periods=N, freq="T")
362+
self.rng = rng
363+
364+
def time_apply_index(self, offset):
365+
offset.apply_index(self.rng)
366+
367+
191368
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/categoricals.py

-12
Original file line numberDiff line numberDiff line change
@@ -63,18 +63,6 @@ def time_existing_series(self):
6363
pd.Categorical(self.series)
6464

6565

66-
class CategoricalOps:
67-
params = ["__lt__", "__le__", "__eq__", "__ne__", "__ge__", "__gt__"]
68-
param_names = ["op"]
69-
70-
def setup(self, op):
71-
N = 10 ** 5
72-
self.cat = pd.Categorical(list("aabbcd") * N, ordered=True)
73-
74-
def time_categorical_op(self, op):
75-
getattr(self.cat, op)("b")
76-
77-
7866
class Concat:
7967
def setup(self):
8068
N = 10 ** 5

asv_bench/benchmarks/index_object.py

-26
Original file line numberDiff line numberDiff line change
@@ -63,32 +63,6 @@ def time_is_dates_only(self):
6363
self.dr._is_dates_only
6464

6565

66-
class Ops:
67-
68-
params = ["float", "int"]
69-
param_names = ["dtype"]
70-
71-
def setup(self, dtype):
72-
N = 10 ** 6
73-
indexes = {"int": "makeIntIndex", "float": "makeFloatIndex"}
74-
self.index = getattr(tm, indexes[dtype])(N)
75-
76-
def time_add(self, dtype):
77-
self.index + 2
78-
79-
def time_subtract(self, dtype):
80-
self.index - 2
81-
82-
def time_multiply(self, dtype):
83-
self.index * 2
84-
85-
def time_divide(self, dtype):
86-
self.index / 2
87-
88-
def time_modulo(self, dtype):
89-
self.index % 2
90-
91-
9266
class Range:
9367
def setup(self):
9468
self.idx_inc = RangeIndex(start=0, stop=10 ** 7, step=3)

asv_bench/benchmarks/inference.py

+2-47
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,8 @@
11
import numpy as np
22

3-
from pandas import DataFrame, Series, to_numeric
3+
from pandas import Series, to_numeric
44

5-
from .pandas_vb_common import lib, numeric_dtypes, tm
6-
7-
8-
class NumericInferOps:
9-
# from GH 7332
10-
params = numeric_dtypes
11-
param_names = ["dtype"]
12-
13-
def setup(self, dtype):
14-
N = 5 * 10 ** 5
15-
self.df = DataFrame(
16-
{"A": np.arange(N).astype(dtype), "B": np.arange(N).astype(dtype)}
17-
)
18-
19-
def time_add(self, dtype):
20-
self.df["A"] + self.df["B"]
21-
22-
def time_subtract(self, dtype):
23-
self.df["A"] - self.df["B"]
24-
25-
def time_multiply(self, dtype):
26-
self.df["A"] * self.df["B"]
27-
28-
def time_divide(self, dtype):
29-
self.df["A"] / self.df["B"]
30-
31-
def time_modulo(self, dtype):
32-
self.df["A"] % self.df["B"]
33-
34-
35-
class DateInferOps:
36-
# from GH 7332
37-
def setup_cache(self):
38-
N = 5 * 10 ** 5
39-
df = DataFrame({"datetime64": np.arange(N).astype("datetime64[ms]")})
40-
df["timedelta"] = df["datetime64"] - df["datetime64"]
41-
return df
42-
43-
def time_subtract_datetimes(self, df):
44-
df["datetime64"] - df["datetime64"]
45-
46-
def time_timedelta_plus_datetime(self, df):
47-
df["timedelta"] + df["datetime64"]
48-
49-
def time_add_timedeltas(self, df):
50-
df["timedelta"] + df["timedelta"]
5+
from .pandas_vb_common import lib, tm
516

527

538
class ToNumeric:

asv_bench/benchmarks/offset.py

-80
This file was deleted.

0 commit comments

Comments
 (0)