Skip to content

Commit 740f9e5

Browse files
committed
Merge remote-tracking branch 'upstream/master' into ea-repr
2 parents a35399e + db8d33e commit 740f9e5

File tree

287 files changed

+16063
-14141
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

287 files changed

+16063
-14141
lines changed

.circleci/config.yml

+8-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,13 @@ jobs:
2626
name: build
2727
command: |
2828
./ci/circle/install_circle.sh
29-
./ci/circle/show_circle.sh
29+
export PATH="$MINICONDA_DIR/bin:$PATH"
30+
source activate pandas-dev
31+
python -c "import pandas; pandas.show_versions();"
3032
- run:
3133
name: test
32-
command: ./ci/circle/run_circle.sh --skip-slow --skip-network
34+
command: |
35+
export PATH="$MINICONDA_DIR/bin:$PATH"
36+
source activate pandas-dev
37+
echo "pytest --strict --durations=10 --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml --skip-slow --skip-network pandas"
38+
pytest --strict --durations=10 --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml --skip-slow --skip-network pandas

.travis.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ before_script:
105105

106106
script:
107107
- echo "script start"
108+
- source activate pandas-dev
108109
- ci/run_build_docs.sh
109110
- ci/script_single.sh
110111
- ci/script_multi.sh
@@ -115,7 +116,7 @@ after_success:
115116

116117
after_script:
117118
- echo "after_script start"
118-
- source activate pandas && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
119+
- source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
119120
- if [ -e test-data-single.xml ]; then
120121
ci/print_skipped.py test-data-single.xml;
121122
fi

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ build: clean_pyc
1313
python setup.py build_ext --inplace
1414

1515
lint-diff:
16-
git diff master --name-only -- "*.py" | grep -E "pandas|scripts" | xargs flake8
16+
git diff upstream/master --name-only -- "*.py" | xargs flake8
1717

1818
develop: build
1919
-python setup.py develop

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ pip install pandas
171171
```
172172

173173
## Dependencies
174-
- [NumPy](https://www.numpy.org): 1.9.0 or higher
174+
- [NumPy](https://www.numpy.org): 1.12.0 or higher
175175
- [python-dateutil](https://labix.org/python-dateutil): 2.5.0 or higher
176176
- [pytz](https://pythonhosted.org/pytz): 2011k or higher
177177

asv_bench/benchmarks/binary_ops.py

+13
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ def setup(self):
5252
np.iinfo(np.int16).max,
5353
size=(N, N)))
5454

55+
self.s = Series(np.random.randn(N))
56+
5557
# Division
5658

5759
def time_frame_float_div(self):
@@ -74,6 +76,17 @@ def time_frame_int_mod(self):
7476
def time_frame_float_mod(self):
7577
self.df % self.df2
7678

79+
# Dot product
80+
81+
def time_frame_dot(self):
82+
self.df.dot(self.df2)
83+
84+
def time_series_dot(self):
85+
self.s.dot(self.s)
86+
87+
def time_frame_series_dot(self):
88+
self.df.dot(self.s)
89+
7790

7891
class Timeseries(object):
7992

asv_bench/benchmarks/frame_methods.py

+30
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,36 @@ def time_reindex_upcast(self):
6969
self.df2.reindex(np.random.permutation(range(1200)))
7070

7171

72+
class Rename(object):
73+
74+
def setup(self):
75+
N = 10**3
76+
self.df = DataFrame(np.random.randn(N * 10, N))
77+
self.idx = np.arange(4 * N, 7 * N)
78+
self.dict_idx = {k: k for k in self.idx}
79+
self.df2 = DataFrame(
80+
{c: {0: np.random.randint(0, 2, N).astype(np.bool_),
81+
1: np.random.randint(0, N, N).astype(np.int16),
82+
2: np.random.randint(0, N, N).astype(np.int32),
83+
3: np.random.randint(0, N, N).astype(np.int64)}
84+
[np.random.randint(0, 4)] for c in range(N)})
85+
86+
def time_rename_single(self):
87+
self.df.rename({0: 0})
88+
89+
def time_rename_axis0(self):
90+
self.df.rename(self.dict_idx)
91+
92+
def time_rename_axis1(self):
93+
self.df.rename(columns=self.dict_idx)
94+
95+
def time_rename_both_axes(self):
96+
self.df.rename(index=self.dict_idx, columns=self.dict_idx)
97+
98+
def time_dict_rename_both_axes(self):
99+
self.df.rename(index=self.dict_idx, columns=self.dict_idx)
100+
101+
72102
class Iteration(object):
73103

74104
def setup(self):

asv_bench/benchmarks/plotting.py

+42-11
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,48 @@
88
matplotlib.use('Agg')
99

1010

11-
class Plotting(object):
12-
13-
def setup(self):
14-
self.s = Series(np.random.randn(1000000))
15-
self.df = DataFrame({'col': self.s})
16-
17-
def time_series_plot(self):
18-
self.s.plot()
19-
20-
def time_frame_plot(self):
21-
self.df.plot()
11+
class SeriesPlotting(object):
12+
params = [['line', 'bar', 'area', 'barh', 'hist', 'kde', 'pie']]
13+
param_names = ['kind']
14+
15+
def setup(self, kind):
16+
if kind in ['bar', 'barh', 'pie']:
17+
n = 100
18+
elif kind in ['kde']:
19+
n = 10000
20+
else:
21+
n = 1000000
22+
23+
self.s = Series(np.random.randn(n))
24+
if kind in ['area', 'pie']:
25+
self.s = self.s.abs()
26+
27+
def time_series_plot(self, kind):
28+
self.s.plot(kind=kind)
29+
30+
31+
class FramePlotting(object):
32+
params = [['line', 'bar', 'area', 'barh', 'hist', 'kde', 'pie', 'scatter',
33+
'hexbin']]
34+
param_names = ['kind']
35+
36+
def setup(self, kind):
37+
if kind in ['bar', 'barh', 'pie']:
38+
n = 100
39+
elif kind in ['kde', 'scatter', 'hexbin']:
40+
n = 10000
41+
else:
42+
n = 1000000
43+
44+
self.x = Series(np.random.randn(n))
45+
self.y = Series(np.random.randn(n))
46+
if kind in ['area', 'pie']:
47+
self.x = self.x.abs()
48+
self.y = self.y.abs()
49+
self.df = DataFrame({'x': self.x, 'y': self.y})
50+
51+
def time_frame_plot(self, kind):
52+
self.df.plot(x='x', y='y', kind=kind)
2253

2354

2455
class TimeseriesPlotting(object):

asv_bench/benchmarks/reshape.py

+38
Original file line numberDiff line numberDiff line change
@@ -146,4 +146,42 @@ def time_get_dummies_1d_sparse(self):
146146
pd.get_dummies(self.s, sparse=True)
147147

148148

149+
class Cut(object):
150+
params = [[4, 10, 1000]]
151+
param_names = ['bins']
152+
153+
def setup(self, bins):
154+
N = 10**5
155+
self.int_series = pd.Series(np.arange(N).repeat(5))
156+
self.float_series = pd.Series(np.random.randn(N).repeat(5))
157+
self.timedelta_series = pd.Series(np.random.randint(N, size=N),
158+
dtype='timedelta64[ns]')
159+
self.datetime_series = pd.Series(np.random.randint(N, size=N),
160+
dtype='datetime64[ns]')
161+
162+
def time_cut_int(self, bins):
163+
pd.cut(self.int_series, bins)
164+
165+
def time_cut_float(self, bins):
166+
pd.cut(self.float_series, bins)
167+
168+
def time_cut_timedelta(self, bins):
169+
pd.cut(self.timedelta_series, bins)
170+
171+
def time_cut_datetime(self, bins):
172+
pd.cut(self.datetime_series, bins)
173+
174+
def time_qcut_int(self, bins):
175+
pd.qcut(self.int_series, bins)
176+
177+
def time_qcut_float(self, bins):
178+
pd.qcut(self.float_series, bins)
179+
180+
def time_qcut_timedelta(self, bins):
181+
pd.qcut(self.timedelta_series, bins)
182+
183+
def time_qcut_datetime(self, bins):
184+
pd.qcut(self.datetime_series, bins)
185+
186+
149187
from .pandas_vb_common import setup # noqa: F401

asv_bench/benchmarks/rolling.py

+36
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,42 @@ def time_rolling(self, constructor, window, dtype, method):
2121
getattr(self.roll, method)()
2222

2323

24+
class ExpandingMethods(object):
25+
26+
sample_time = 0.2
27+
params = (['DataFrame', 'Series'],
28+
['int', 'float'],
29+
['median', 'mean', 'max', 'min', 'std', 'count', 'skew', 'kurt',
30+
'sum'])
31+
param_names = ['contructor', 'window', 'dtype', 'method']
32+
33+
def setup(self, constructor, dtype, method):
34+
N = 10**5
35+
arr = (100 * np.random.random(N)).astype(dtype)
36+
self.expanding = getattr(pd, constructor)(arr).expanding()
37+
38+
def time_expanding(self, constructor, dtype, method):
39+
getattr(self.expanding, method)()
40+
41+
42+
class EWMMethods(object):
43+
44+
sample_time = 0.2
45+
params = (['DataFrame', 'Series'],
46+
[10, 1000],
47+
['int', 'float'],
48+
['mean', 'std'])
49+
param_names = ['contructor', 'window', 'dtype', 'method']
50+
51+
def setup(self, constructor, window, dtype, method):
52+
N = 10**5
53+
arr = (100 * np.random.random(N)).astype(dtype)
54+
self.ewm = getattr(pd, constructor)(arr).ewm(halflife=window)
55+
56+
def time_ewm(self, constructor, window, dtype, method):
57+
getattr(self.ewm, method)()
58+
59+
2460
class VariableWindowMethods(Methods):
2561
sample_time = 0.2
2662
params = (['DataFrame', 'Series'],

asv_bench/benchmarks/stat_ops.py

+32-4
Original file line numberDiff line numberDiff line change
@@ -96,14 +96,42 @@ def time_average_old(self, constructor, pct):
9696

9797
class Correlation(object):
9898

99-
params = ['spearman', 'kendall', 'pearson']
100-
param_names = ['method']
99+
params = [['spearman', 'kendall', 'pearson'], [True, False]]
100+
param_names = ['method', 'use_bottleneck']
101101

102-
def setup(self, method):
102+
def setup(self, method, use_bottleneck):
103+
try:
104+
pd.options.compute.use_bottleneck = use_bottleneck
105+
except TypeError:
106+
from pandas.core import nanops
107+
nanops._USE_BOTTLENECK = use_bottleneck
103108
self.df = pd.DataFrame(np.random.randn(1000, 30))
109+
self.s = pd.Series(np.random.randn(1000))
110+
self.s2 = pd.Series(np.random.randn(1000))
104111

105-
def time_corr(self, method):
112+
def time_corr(self, method, use_bottleneck):
106113
self.df.corr(method=method)
107114

115+
def time_corr_series(self, method, use_bottleneck):
116+
self.s.corr(self.s2, method=method)
117+
118+
119+
class Covariance(object):
120+
121+
params = [[True, False]]
122+
param_names = ['use_bottleneck']
123+
124+
def setup(self, use_bottleneck):
125+
try:
126+
pd.options.compute.use_bottleneck = use_bottleneck
127+
except TypeError:
128+
from pandas.core import nanops
129+
nanops._USE_BOTTLENECK = use_bottleneck
130+
self.s = pd.Series(np.random.randn(100000))
131+
self.s2 = pd.Series(np.random.randn(100000))
132+
133+
def time_cov_series(self, use_bottleneck):
134+
self.s.cov(self.s2)
135+
108136

109137
from .pandas_vb_common import setup # noqa: F401

asv_bench/benchmarks/strings.py

+30
Original file line numberDiff line numberDiff line change
@@ -26,21 +26,42 @@ def time_extract(self):
2626
def time_findall(self):
2727
self.s.str.findall('[A-Z]+')
2828

29+
def time_find(self):
30+
self.s.str.find('[A-Z]+')
31+
32+
def time_rfind(self):
33+
self.s.str.rfind('[A-Z]+')
34+
2935
def time_get(self):
3036
self.s.str.get(0)
3137

3238
def time_len(self):
3339
self.s.str.len()
3440

41+
def time_join(self):
42+
self.s.str.join(' ')
43+
3544
def time_match(self):
3645
self.s.str.match('A')
3746

47+
def time_normalize(self):
48+
self.s.str.normalize('NFC')
49+
3850
def time_pad(self):
3951
self.s.str.pad(100, side='both')
4052

53+
def time_partition(self):
54+
self.s.str.partition('A')
55+
56+
def time_rpartition(self):
57+
self.s.str.rpartition('A')
58+
4159
def time_replace(self):
4260
self.s.str.replace('A', '\x01\x01')
4361

62+
def time_translate(self):
63+
self.s.str.translate({'A': '\x01\x01'})
64+
4465
def time_slice(self):
4566
self.s.str.slice(5, 15, 2)
4667

@@ -65,6 +86,12 @@ def time_upper(self):
6586
def time_lower(self):
6687
self.s.str.lower()
6788

89+
def time_wrap(self):
90+
self.s.str.wrap(10)
91+
92+
def time_zfill(self):
93+
self.s.str.zfill(10)
94+
6895

6996
class Repeat(object):
7097

@@ -129,6 +156,9 @@ def setup(self, expand):
129156
def time_split(self, expand):
130157
self.s.str.split('--', expand=expand)
131158

159+
def time_rsplit(self, expand):
160+
self.s.str.rsplit('--', expand=expand)
161+
132162

133163
class Dummies(object):
134164

0 commit comments

Comments
 (0)