Skip to content

Commit 5413599

Browse files
committed
Merge remote-tracking branch 'upstream/master' into mcmali-s3-pub-test
2 parents 9b92536 + c15f080 commit 5413599

File tree

235 files changed

+5796
-3289
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

235 files changed

+5796
-3289
lines changed

.travis.yml

-6
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,6 @@ matrix:
5858
services:
5959
- mysql
6060
- postgresql
61-
62-
- env:
63-
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1"
64-
services:
65-
- mysql
66-
- postgresql
6761
allow_failures:
6862
- arch: arm64
6963
env:

asv_bench/benchmarks/categoricals.py

+4
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def setup(self):
3434
self.values_all_int8 = np.ones(N, "int8")
3535
self.categorical = pd.Categorical(self.values, self.categories)
3636
self.series = pd.Series(self.categorical)
37+
self.intervals = pd.interval_range(0, 1, periods=N // 10)
3738

3839
def time_regular(self):
3940
pd.Categorical(self.values, self.categories)
@@ -44,6 +45,9 @@ def time_fastpath(self):
4445
def time_datetimes(self):
4546
pd.Categorical(self.datetimes)
4647

48+
def time_interval(self):
49+
pd.Categorical(self.datetimes, categories=self.datetimes)
50+
4751
def time_datetimes_with_nat(self):
4852
pd.Categorical(self.datetimes_with_nat)
4953

asv_bench/benchmarks/groupby.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from .pandas_vb_common import tm
1818

19-
method_blacklist = {
19+
method_blocklist = {
2020
"object": {
2121
"median",
2222
"prod",
@@ -403,7 +403,7 @@ class GroupByMethods:
403403
]
404404

405405
def setup(self, dtype, method, application):
406-
if method in method_blacklist.get(dtype, {}):
406+
if method in method_blocklist.get(dtype, {}):
407407
raise NotImplementedError # skip benchmark
408408
ngroups = 1000
409409
size = ngroups * 2

asv_bench/benchmarks/io/json.py

+30
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import sys
2+
13
import numpy as np
24

35
from pandas import DataFrame, concat, date_range, read_json, timedelta_range
@@ -82,6 +84,7 @@ def setup(self, orient, frame):
8284
timedeltas = timedelta_range(start=1, periods=N, freq="s")
8385
datetimes = date_range(start=1, periods=N, freq="s")
8486
ints = np.random.randint(100000000, size=N)
87+
longints = sys.maxsize * np.random.randint(100000000, size=N)
8588
floats = np.random.randn(N)
8689
strings = tm.makeStringIndex(N)
8790
self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N))
@@ -120,6 +123,18 @@ def setup(self, orient, frame):
120123
index=index,
121124
)
122125

126+
self.df_longint_float_str = DataFrame(
127+
{
128+
"longint_1": longints,
129+
"longint_2": longints,
130+
"float_1": floats,
131+
"float_2": floats,
132+
"str_1": strings,
133+
"str_2": strings,
134+
},
135+
index=index,
136+
)
137+
123138
def time_to_json(self, orient, frame):
124139
getattr(self, frame).to_json(self.fname, orient=orient)
125140

@@ -172,6 +187,7 @@ def setup(self):
172187
timedeltas = timedelta_range(start=1, periods=N, freq="s")
173188
datetimes = date_range(start=1, periods=N, freq="s")
174189
ints = np.random.randint(100000000, size=N)
190+
longints = sys.maxsize * np.random.randint(100000000, size=N)
175191
floats = np.random.randn(N)
176192
strings = tm.makeStringIndex(N)
177193
self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N))
@@ -209,6 +225,17 @@ def setup(self):
209225
},
210226
index=index,
211227
)
228+
self.df_longint_float_str = DataFrame(
229+
{
230+
"longint_1": longints,
231+
"longint_2": longints,
232+
"float_1": floats,
233+
"float_2": floats,
234+
"str_1": strings,
235+
"str_2": strings,
236+
},
237+
index=index,
238+
)
212239

213240
def time_floats_with_int_idex_lines(self):
214241
self.df.to_json(self.fname, orient="records", lines=True)
@@ -225,6 +252,9 @@ def time_float_int_lines(self):
225252
def time_float_int_str_lines(self):
226253
self.df_int_float_str.to_json(self.fname, orient="records", lines=True)
227254

255+
def time_float_longint_str_lines(self):
256+
self.df_longint_float_str.to_json(self.fname, orient="records", lines=True)
257+
228258

229259
class ToJSONMem:
230260
def setup_cache(self):

asv_bench/benchmarks/rolling.py

+7
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,18 @@ class EWMMethods:
9191
def setup(self, constructor, window, dtype, method):
9292
N = 10 ** 5
9393
arr = (100 * np.random.random(N)).astype(dtype)
94+
times = pd.date_range("1900", periods=N, freq="23s")
9495
self.ewm = getattr(pd, constructor)(arr).ewm(halflife=window)
96+
self.ewm_times = getattr(pd, constructor)(arr).ewm(
97+
halflife="1 Day", times=times
98+
)
9599

96100
def time_ewm(self, constructor, window, dtype, method):
97101
getattr(self.ewm, method)()
98102

103+
def time_ewm_times(self, constructor, window, dtype, method):
104+
self.ewm.mean()
105+
99106

100107
class VariableWindowMethods(Methods):
101108
params = (

asv_bench/benchmarks/tslibs/fields.py

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import numpy as np
2+
3+
from pandas._libs.tslibs.fields import (
4+
get_date_field,
5+
get_start_end_field,
6+
get_timedelta_field,
7+
)
8+
9+
from .tslib import _sizes
10+
11+
12+
class TimeGetTimedeltaField:
13+
params = [
14+
_sizes,
15+
["days", "h", "s", "seconds", "ms", "microseconds", "us", "ns", "nanoseconds"],
16+
]
17+
param_names = ["size", "field"]
18+
19+
def setup(self, size, field):
20+
arr = np.random.randint(0, 10, size=size, dtype="i8")
21+
self.i8data = arr
22+
23+
def time_get_timedelta_field(self, size, field):
24+
get_timedelta_field(self.i8data, field)
25+
26+
27+
class TimeGetDateField:
28+
params = [
29+
_sizes,
30+
[
31+
"Y",
32+
"M",
33+
"D",
34+
"h",
35+
"m",
36+
"s",
37+
"us",
38+
"ns",
39+
"doy",
40+
"dow",
41+
"woy",
42+
"q",
43+
"dim",
44+
"is_leap_year",
45+
],
46+
]
47+
param_names = ["size", "field"]
48+
49+
def setup(self, size, field):
50+
arr = np.random.randint(0, 10, size=size, dtype="i8")
51+
self.i8data = arr
52+
53+
def time_get_date_field(self, size, field):
54+
get_date_field(self.i8data, field)
55+
56+
57+
class TimeGetStartEndField:
58+
params = [
59+
_sizes,
60+
["start", "end"],
61+
["month", "quarter", "year"],
62+
["B", None, "QS"],
63+
[12, 3, 5],
64+
]
65+
param_names = ["size", "side", "period", "freqstr", "month_kw"]
66+
67+
def setup(self, size, side, period, freqstr, month_kw):
68+
arr = np.random.randint(0, 10, size=size, dtype="i8")
69+
self.i8data = arr
70+
71+
self.attrname = f"is_{period}_{side}"
72+
73+
def time_get_start_end_field(self, size, side, period, freqstr, month_kw):
74+
get_start_end_field(self.i8data, self.attrname, freqstr, month_kw=month_kw)
+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
"""
2+
ipython analogue:
3+
4+
tr = TimeResolution()
5+
mi = pd.MultiIndex.from_product(tr.params[:-1] + ([str(x) for x in tr.params[-1]],))
6+
df = pd.DataFrame(np.nan, index=mi, columns=["mean", "stdev"])
7+
8+
for unit in tr.params[0]:
9+
for size in tr.params[1]:
10+
for tz in tr.params[2]:
11+
tr.setup(unit, size, tz)
12+
key = (unit, size, str(tz))
13+
print(key)
14+
15+
val = %timeit -o tr.time_get_resolution(unit, size, tz)
16+
17+
df.loc[key] = (val.average, val.stdev)
18+
19+
"""
20+
from datetime import timedelta, timezone
21+
22+
from dateutil.tz import gettz, tzlocal
23+
import numpy as np
24+
import pytz
25+
26+
try:
27+
from pandas._libs.tslibs import get_resolution
28+
except ImportError:
29+
from pandas._libs.tslibs.resolution import get_resolution
30+
31+
32+
class TimeResolution:
33+
params = (
34+
["D", "h", "m", "s", "us", "ns"],
35+
[1, 100, 10 ** 4, 10 ** 6],
36+
[
37+
None,
38+
timezone.utc,
39+
timezone(timedelta(minutes=60)),
40+
pytz.timezone("US/Pacific"),
41+
gettz("Asia/Tokyo"),
42+
tzlocal(),
43+
],
44+
)
45+
param_names = ["unit", "size", "tz"]
46+
47+
def setup(self, unit, size, tz):
48+
arr = np.random.randint(0, 10, size=size, dtype="i8")
49+
arr = arr.view(f"M8[{unit}]").astype("M8[ns]").view("i8")
50+
self.i8data = arr
51+
52+
def time_get_resolution(self, unit, size, tz):
53+
get_resolution(self.i8data, tz)

asv_bench/benchmarks/tslibs/timestamp.py

+21-10
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,29 @@
1-
import datetime
1+
from datetime import datetime, timedelta, timezone
22

3-
import dateutil
3+
from dateutil.tz import gettz, tzlocal, tzutc
44
import numpy as np
55
import pytz
66

77
from pandas import Timestamp
88

9+
# One case for each type of tzinfo object that has its own code path
10+
# in tzconversion code.
11+
_tzs = [
12+
None,
13+
pytz.timezone("Europe/Amsterdam"),
14+
gettz("US/Central"),
15+
pytz.UTC,
16+
tzutc(),
17+
timezone(timedelta(minutes=60)),
18+
tzlocal(),
19+
]
20+
921

1022
class TimestampConstruction:
1123
def setup(self):
1224
self.npdatetime64 = np.datetime64("2020-01-01 00:00:00")
13-
self.dttime_unaware = datetime.datetime(2020, 1, 1, 0, 0, 0)
14-
self.dttime_aware = datetime.datetime(2020, 1, 1, 0, 0, 0, 0, pytz.UTC)
25+
self.dttime_unaware = datetime(2020, 1, 1, 0, 0, 0)
26+
self.dttime_aware = datetime(2020, 1, 1, 0, 0, 0, 0, pytz.UTC)
1527
self.ts = Timestamp("2020-01-01 00:00:00")
1628

1729
def time_parse_iso8601_no_tz(self):
@@ -49,7 +61,6 @@ def time_from_pd_timestamp(self):
4961

5062

5163
class TimestampProperties:
52-
_tzs = [None, pytz.timezone("Europe/Amsterdam"), pytz.UTC, dateutil.tz.tzutc()]
5364
_freqs = [None, "B"]
5465
params = [_tzs, _freqs]
5566
param_names = ["tz", "freq"]
@@ -63,9 +74,6 @@ def time_tz(self, tz, freq):
6374
def time_dayofweek(self, tz, freq):
6475
self.ts.dayofweek
6576

66-
def time_weekday_name(self, tz, freq):
67-
self.ts.day_name
68-
6977
def time_dayofyear(self, tz, freq):
7078
self.ts.dayofyear
7179

@@ -108,9 +116,12 @@ def time_microsecond(self, tz, freq):
108116
def time_month_name(self, tz, freq):
109117
self.ts.month_name()
110118

119+
def time_weekday_name(self, tz, freq):
120+
self.ts.day_name()
121+
111122

112123
class TimestampOps:
113-
params = [None, "US/Eastern", pytz.UTC, dateutil.tz.tzutc()]
124+
params = _tzs
114125
param_names = ["tz"]
115126

116127
def setup(self, tz):
@@ -148,7 +159,7 @@ def time_ceil(self, tz):
148159

149160
class TimestampAcrossDst:
150161
def setup(self):
151-
dt = datetime.datetime(2016, 3, 27, 1)
162+
dt = datetime(2016, 3, 27, 1)
152163
self.tzinfo = pytz.timezone("CET").localize(dt, is_dst=False).tzinfo
153164
self.ts2 = Timestamp(dt)
154165

asv_bench/benchmarks/tslibs/tslib.py

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
"""
2+
ipython analogue:
3+
4+
tr = TimeIntsToPydatetime()
5+
mi = pd.MultiIndex.from_product(
6+
tr.params[:-1] + ([str(x) for x in tr.params[-1]],)
7+
)
8+
df = pd.DataFrame(np.nan, index=mi, columns=["mean", "stdev"])
9+
for box in tr.params[0]:
10+
for size in tr.params[1]:
11+
for tz in tr.params[2]:
12+
tr.setup(box, size, tz)
13+
key = (box, size, str(tz))
14+
print(key)
15+
val = %timeit -o tr.time_ints_to_pydatetime(box, size, tz)
16+
df.loc[key] = (val.average, val.stdev)
17+
"""
18+
from datetime import timedelta, timezone
19+
20+
from dateutil.tz import gettz, tzlocal
21+
import numpy as np
22+
import pytz
23+
24+
try:
25+
from pandas._libs.tslibs import ints_to_pydatetime
26+
except ImportError:
27+
from pandas._libs.tslib import ints_to_pydatetime
28+
29+
_tzs = [
30+
None,
31+
timezone.utc,
32+
timezone(timedelta(minutes=60)),
33+
pytz.timezone("US/Pacific"),
34+
gettz("Asia/Tokyo"),
35+
tzlocal(),
36+
]
37+
_sizes = [0, 1, 100, 10 ** 4, 10 ** 6]
38+
39+
40+
class TimeIntsToPydatetime:
41+
params = (
42+
["time", "date", "datetime", "timestamp"],
43+
_sizes,
44+
_tzs,
45+
)
46+
param_names = ["box", "size", "tz"]
47+
# TODO: fold? freq?
48+
49+
def setup(self, box, size, tz):
50+
arr = np.random.randint(0, 10, size=size, dtype="i8")
51+
self.i8data = arr
52+
53+
def time_ints_to_pydatetime(self, box, size, tz):
54+
if box == "date":
55+
# ints_to_pydatetime does not allow non-None tz with date;
56+
# this will mean doing some duplicate benchmarks
57+
tz = None
58+
ints_to_pydatetime(self.i8data, tz, box=box)

0 commit comments

Comments
 (0)