Skip to content

Commit 5dc5004

Browse files
committed
Merge remote-tracking branch 'upstream/master' into pd.Series.map_performance
2 parents 17a0514 + 04e9e0a commit 5dc5004

File tree

311 files changed

+6522
-3196
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

311 files changed

+6522
-3196
lines changed

.travis.yml

-6
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,6 @@ matrix:
5858
services:
5959
- mysql
6060
- postgresql
61-
62-
- env:
63-
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1"
64-
services:
65-
- mysql
66-
- postgresql
6761
allow_failures:
6862
- arch: arm64
6963
env:

LICENSES/XARRAY_LICENSE

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
Copyright 2014-2019, xarray Developers
2+
3+
--------------------------------------------------------------------------------
4+
15
Apache License
26
Version 2.0, January 2004
37
http://www.apache.org/licenses/

asv_bench/asv.conf.json

+1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
"xlwt": [],
5454
"odfpy": [],
5555
"pytest": [],
56+
"jinja2": [],
5657
// If using Windows with python 2.7 and want to build using the
5758
// mingw toolchain (rather than MSVC), uncomment the following line.
5859
// "libpython": [],

asv_bench/benchmarks/io/style.py

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import numpy as np
2+
3+
from pandas import DataFrame
4+
5+
6+
class RenderApply:
7+
8+
params = [[12, 24, 36], [12, 120]]
9+
param_names = ["cols", "rows"]
10+
11+
def setup(self, cols, rows):
12+
self.df = DataFrame(
13+
np.random.randn(rows, cols),
14+
columns=[f"float_{i+1}" for i in range(cols)],
15+
index=[f"row_{i+1}" for i in range(rows)],
16+
)
17+
self._style_apply()
18+
19+
def time_render(self, cols, rows):
20+
self.st.render()
21+
22+
def peakmem_apply(self, cols, rows):
23+
self._style_apply()
24+
25+
def peakmem_render(self, cols, rows):
26+
self.st.render()
27+
28+
def _style_apply(self):
29+
def _apply_func(s):
30+
return [
31+
"background-color: lightcyan" if s.name == "row_1" else "" for v in s
32+
]
33+
34+
self.st = self.df.style.apply(_apply_func, axis=1)

asv_bench/benchmarks/rolling.py

+7
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,18 @@ class EWMMethods:
9191
def setup(self, constructor, window, dtype, method):
9292
N = 10 ** 5
9393
arr = (100 * np.random.random(N)).astype(dtype)
94+
times = pd.date_range("1900", periods=N, freq="23s")
9495
self.ewm = getattr(pd, constructor)(arr).ewm(halflife=window)
96+
self.ewm_times = getattr(pd, constructor)(arr).ewm(
97+
halflife="1 Day", times=times
98+
)
9599

96100
def time_ewm(self, constructor, window, dtype, method):
97101
getattr(self.ewm, method)()
98102

103+
def time_ewm_times(self, constructor, window, dtype, method):
104+
self.ewm.mean()
105+
99106

100107
class VariableWindowMethods(Methods):
101108
params = (

asv_bench/benchmarks/tslibs/fields.py

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import numpy as np
2+
3+
from pandas._libs.tslibs.fields import (
4+
get_date_field,
5+
get_start_end_field,
6+
get_timedelta_field,
7+
)
8+
9+
from .tslib import _sizes
10+
11+
12+
class TimeGetTimedeltaField:
13+
params = [
14+
_sizes,
15+
["days", "h", "s", "seconds", "ms", "microseconds", "us", "ns", "nanoseconds"],
16+
]
17+
param_names = ["size", "field"]
18+
19+
def setup(self, size, field):
20+
arr = np.random.randint(0, 10, size=size, dtype="i8")
21+
self.i8data = arr
22+
23+
def time_get_timedelta_field(self, size, field):
24+
get_timedelta_field(self.i8data, field)
25+
26+
27+
class TimeGetDateField:
28+
params = [
29+
_sizes,
30+
[
31+
"Y",
32+
"M",
33+
"D",
34+
"h",
35+
"m",
36+
"s",
37+
"us",
38+
"ns",
39+
"doy",
40+
"dow",
41+
"woy",
42+
"q",
43+
"dim",
44+
"is_leap_year",
45+
],
46+
]
47+
param_names = ["size", "field"]
48+
49+
def setup(self, size, field):
50+
arr = np.random.randint(0, 10, size=size, dtype="i8")
51+
self.i8data = arr
52+
53+
def time_get_date_field(self, size, field):
54+
get_date_field(self.i8data, field)
55+
56+
57+
class TimeGetStartEndField:
58+
params = [
59+
_sizes,
60+
["start", "end"],
61+
["month", "quarter", "year"],
62+
["B", None, "QS"],
63+
[12, 3, 5],
64+
]
65+
param_names = ["size", "side", "period", "freqstr", "month_kw"]
66+
67+
def setup(self, size, side, period, freqstr, month_kw):
68+
arr = np.random.randint(0, 10, size=size, dtype="i8")
69+
self.i8data = arr
70+
71+
self.attrname = f"is_{period}_{side}"
72+
73+
def time_get_start_end_field(self, size, side, period, freqstr, month_kw):
74+
get_start_end_field(self.i8data, self.attrname, freqstr, month_kw=month_kw)
+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
try:
2+
from pandas._libs.tslibs import normalize_i8_timestamps, is_date_array_normalized
3+
except ImportError:
4+
from pandas._libs.tslibs.conversion import (
5+
normalize_i8_timestamps,
6+
is_date_array_normalized,
7+
)
8+
9+
import pandas as pd
10+
11+
from .tslib import _sizes, _tzs
12+
13+
14+
class Normalize:
15+
params = [
16+
_sizes,
17+
_tzs,
18+
]
19+
param_names = ["size", "tz"]
20+
21+
def setup(self, size, tz):
22+
# use an array that will have is_date_array_normalized give True,
23+
# so we do not short-circuit early.
24+
dti = pd.date_range("2016-01-01", periods=10, tz=tz).repeat(size // 10)
25+
self.i8data = dti.asi8
26+
27+
def time_normalize_i8_timestamps(self, size, tz):
28+
normalize_i8_timestamps(self.i8data, tz)
29+
30+
def time_is_date_array_normalized(self, size, tz):
31+
# TODO: cases with different levels of short-circuiting
32+
is_date_array_normalized(self.i8data, tz)

asv_bench/benchmarks/tslibs/period.py

+61-1
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,20 @@
22
Period benchmarks that rely only on tslibs. See benchmarks.period for
33
Period benchmarks that rely on other parts fo pandas.
44
"""
5-
from pandas import Period
5+
6+
import numpy as np
7+
8+
from pandas._libs.tslibs.period import Period, periodarr_to_dt64arr
69

710
from pandas.tseries.frequencies import to_offset
811

12+
from .tslib import _sizes, _tzs
13+
14+
try:
15+
from pandas._libs.tslibs.vectorized import dt64arr_to_periodarr
16+
except ImportError:
17+
from pandas._libs.tslibs.period import dt64arr_to_periodarr
18+
919

1020
class PeriodProperties:
1121

@@ -68,3 +78,53 @@ def setup(self, freq, is_offset):
6878

6979
def time_period_constructor(self, freq, is_offset):
7080
Period("2012-06-01", freq=freq)
81+
82+
83+
_freq_ints = [
84+
1000,
85+
1011, # Annual - November End
86+
2000,
87+
2011, # Quarterly - November End
88+
3000,
89+
4000,
90+
4006, # Weekly - Saturday End
91+
5000,
92+
6000,
93+
7000,
94+
8000,
95+
9000,
96+
10000,
97+
11000,
98+
12000,
99+
]
100+
101+
102+
class TimePeriodArrToDT64Arr:
103+
params = [
104+
_sizes,
105+
_freq_ints,
106+
]
107+
param_names = ["size", "freq"]
108+
109+
def setup(self, size, freq):
110+
arr = np.arange(10, dtype="i8").repeat(size // 10)
111+
self.i8values = arr
112+
113+
def time_periodarray_to_dt64arr(self, size, freq):
114+
periodarr_to_dt64arr(self.i8values, freq)
115+
116+
117+
class TimeDT64ArrToPeriodArr:
118+
params = [
119+
_sizes,
120+
_freq_ints,
121+
_tzs,
122+
]
123+
param_names = ["size", "freq", "tz"]
124+
125+
def setup(self, size, freq, tz):
126+
arr = np.arange(10, dtype="i8").repeat(size // 10)
127+
self.i8values = arr
128+
129+
def time_dt64arr_to_periodarr(self, size, freq, tz):
130+
dt64arr_to_periodarr(self.i8values, freq, tz)
+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
"""
2+
ipython analogue:
3+
4+
tr = TimeResolution()
5+
mi = pd.MultiIndex.from_product(tr.params[:-1] + ([str(x) for x in tr.params[-1]],))
6+
df = pd.DataFrame(np.nan, index=mi, columns=["mean", "stdev"])
7+
8+
for unit in tr.params[0]:
9+
for size in tr.params[1]:
10+
for tz in tr.params[2]:
11+
tr.setup(unit, size, tz)
12+
key = (unit, size, str(tz))
13+
print(key)
14+
15+
val = %timeit -o tr.time_get_resolution(unit, size, tz)
16+
17+
df.loc[key] = (val.average, val.stdev)
18+
19+
"""
20+
from datetime import timedelta, timezone
21+
22+
from dateutil.tz import gettz, tzlocal
23+
import numpy as np
24+
import pytz
25+
26+
try:
27+
from pandas._libs.tslibs import get_resolution
28+
except ImportError:
29+
from pandas._libs.tslibs.resolution import get_resolution
30+
31+
32+
class TimeResolution:
33+
params = (
34+
["D", "h", "m", "s", "us", "ns"],
35+
[1, 100, 10 ** 4, 10 ** 6],
36+
[
37+
None,
38+
timezone.utc,
39+
timezone(timedelta(minutes=60)),
40+
pytz.timezone("US/Pacific"),
41+
gettz("Asia/Tokyo"),
42+
tzlocal(),
43+
],
44+
)
45+
param_names = ["unit", "size", "tz"]
46+
47+
def setup(self, unit, size, tz):
48+
arr = np.random.randint(0, 10, size=size, dtype="i8")
49+
arr = arr.view(f"M8[{unit}]").astype("M8[ns]").view("i8")
50+
self.i8data = arr
51+
52+
def time_get_resolution(self, unit, size, tz):
53+
get_resolution(self.i8data, tz)

asv_bench/benchmarks/tslibs/timestamp.py

+21-10
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,29 @@
1-
import datetime
1+
from datetime import datetime, timedelta, timezone
22

3-
import dateutil
3+
from dateutil.tz import gettz, tzlocal, tzutc
44
import numpy as np
55
import pytz
66

77
from pandas import Timestamp
88

9+
# One case for each type of tzinfo object that has its own code path
10+
# in tzconversion code.
11+
_tzs = [
12+
None,
13+
pytz.timezone("Europe/Amsterdam"),
14+
gettz("US/Central"),
15+
pytz.UTC,
16+
tzutc(),
17+
timezone(timedelta(minutes=60)),
18+
tzlocal(),
19+
]
20+
921

1022
class TimestampConstruction:
1123
def setup(self):
1224
self.npdatetime64 = np.datetime64("2020-01-01 00:00:00")
13-
self.dttime_unaware = datetime.datetime(2020, 1, 1, 0, 0, 0)
14-
self.dttime_aware = datetime.datetime(2020, 1, 1, 0, 0, 0, 0, pytz.UTC)
25+
self.dttime_unaware = datetime(2020, 1, 1, 0, 0, 0)
26+
self.dttime_aware = datetime(2020, 1, 1, 0, 0, 0, 0, pytz.UTC)
1527
self.ts = Timestamp("2020-01-01 00:00:00")
1628

1729
def time_parse_iso8601_no_tz(self):
@@ -49,7 +61,6 @@ def time_from_pd_timestamp(self):
4961

5062

5163
class TimestampProperties:
52-
_tzs = [None, pytz.timezone("Europe/Amsterdam"), pytz.UTC, dateutil.tz.tzutc()]
5364
_freqs = [None, "B"]
5465
params = [_tzs, _freqs]
5566
param_names = ["tz", "freq"]
@@ -63,9 +74,6 @@ def time_tz(self, tz, freq):
6374
def time_dayofweek(self, tz, freq):
6475
self.ts.dayofweek
6576

66-
def time_weekday_name(self, tz, freq):
67-
self.ts.day_name
68-
6977
def time_dayofyear(self, tz, freq):
7078
self.ts.dayofyear
7179

@@ -108,9 +116,12 @@ def time_microsecond(self, tz, freq):
108116
def time_month_name(self, tz, freq):
109117
self.ts.month_name()
110118

119+
def time_weekday_name(self, tz, freq):
120+
self.ts.day_name()
121+
111122

112123
class TimestampOps:
113-
params = [None, "US/Eastern", pytz.UTC, dateutil.tz.tzutc()]
124+
params = _tzs
114125
param_names = ["tz"]
115126

116127
def setup(self, tz):
@@ -148,7 +159,7 @@ def time_ceil(self, tz):
148159

149160
class TimestampAcrossDst:
150161
def setup(self):
151-
dt = datetime.datetime(2016, 3, 27, 1)
162+
dt = datetime(2016, 3, 27, 1)
152163
self.tzinfo = pytz.timezone("CET").localize(dt, is_dst=False).tzinfo
153164
self.ts2 = Timestamp(dt)
154165

0 commit comments

Comments
 (0)