Skip to content

Commit 610073a

Browse files
authored
Merge branch 'master' into read_excel_doc
2 parents 3f9d0a1 + 15f6cdb commit 610073a

File tree

122 files changed

+3483
-4033
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

122 files changed

+3483
-4033
lines changed

.travis.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ matrix:
5151
- python-gtk2
5252
- dist: trusty
5353
env:
54-
- JOB="3.5" TEST_ARGS="--skip-slow --skip-network" COVERAGE=true
54+
- JOB="3.5_CONDA_BUILD_TEST" TEST_ARGS="--skip-slow --skip-network" CONDA_BUILD_TEST=true COVERAGE=true
5555
- dist: trusty
5656
env:
5757
- JOB="3.6" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" CONDA_FORGE=true
@@ -62,7 +62,7 @@ matrix:
6262
# In allow_failures
6363
- dist: trusty
6464
env:
65-
- JOB="3.6_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true
65+
- JOB="3.6_PIP_BUILD_TEST" TEST_ARGS="--skip-slow" PIP_BUILD_TEST=true
6666
addons:
6767
apt:
6868
packages:
@@ -81,7 +81,7 @@ matrix:
8181
- JOB="2.7_SLOW" SLOW=true
8282
- dist: trusty
8383
env:
84-
- JOB="3.6_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true
84+
- JOB="3.6_PIP_BUILD_TEST" TEST_ARGS="--skip-slow" PIP_BUILD_TEST=true
8585
addons:
8686
apt:
8787
packages:

asv_bench/benchmarks/ctors.py

+28-8
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,36 @@
11
import numpy as np
2-
from pandas import DataFrame, Series, Index, DatetimeIndex, Timestamp
2+
from pandas import Series, Index, DatetimeIndex, Timestamp
33

44
from .pandas_vb_common import setup # noqa
55

66

7-
class Constructors(object):
7+
class SeriesConstructors(object):
8+
9+
goal_time = 0.2
10+
11+
param_names = ["data_fmt", "with_index"]
12+
params = [[lambda x: x,
13+
list,
14+
lambda arr: list(arr.astype(str)),
15+
lambda arr: dict(zip(range(len(arr)), arr)),
16+
lambda arr: [(i, -i) for i in arr],
17+
lambda arr: [[i, -i] for i in arr],
18+
lambda arr: ([(i, -i) for i in arr][:-1] + [None]),
19+
lambda arr: ([[i, -i] for i in arr][:-1] + [None])],
20+
[False, True]]
21+
22+
def setup(self, data_fmt, with_index):
23+
N = 10**4
24+
np.random.seed(1234)
25+
arr = np.random.randn(N)
26+
self.data = data_fmt(arr)
27+
self.index = np.arange(N) if with_index else None
28+
29+
def time_series_constructor(self, data_fmt, with_index):
30+
Series(self.data, index=self.index)
31+
32+
33+
class SeriesDtypesConstructors(object):
834

935
goal_time = 0.2
1036

@@ -19,12 +45,6 @@ def setup(self):
1945
self.s = Series([Timestamp('20110101'), Timestamp('20120101'),
2046
Timestamp('20130101')] * N * 10)
2147

22-
def time_frame_from_ndarray(self):
23-
DataFrame(self.arr)
24-
25-
def time_series_from_ndarray(self):
26-
Series(self.data, index=self.index)
27-
2848
def time_index_from_array_string(self):
2949
Index(self.arr_str)
3050

asv_bench/benchmarks/frame_ctor.py

+12
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,15 @@ def setup(self, nrows):
8181
def time_frame_from_records_generator(self, nrows):
8282
# issue-6700
8383
self.df = DataFrame.from_records(self.gen, nrows=nrows)
84+
85+
86+
class FromNDArray(object):
87+
88+
goal_time = 0.2
89+
90+
def setup(self):
91+
N = 100000
92+
self.data = np.random.randn(N)
93+
94+
def time_frame_from_ndarray(self):
95+
self.df = DataFrame(self.data)

asv_bench/benchmarks/gil.py

+29-13
Original file line numberDiff line numberDiff line change
@@ -180,19 +180,35 @@ def setup(self, method):
180180
raise NotImplementedError
181181
win = 100
182182
arr = np.random.rand(100000)
183-
rolling = {'rolling_median': rolling_median,
184-
'rolling_mean': rolling_mean,
185-
'rolling_min': rolling_min,
186-
'rolling_max': rolling_max,
187-
'rolling_var': rolling_var,
188-
'rolling_skew': rolling_skew,
189-
'rolling_kurt': rolling_kurt,
190-
'rolling_std': rolling_std}
191-
192-
@test_parallel(num_threads=2)
193-
def parallel_rolling():
194-
rolling[method](arr, win)
195-
self.parallel_rolling = parallel_rolling
183+
if hasattr(DataFrame, 'rolling'):
184+
rolling = {'rolling_median': 'median',
185+
'rolling_mean': 'mean',
186+
'rolling_min': 'min',
187+
'rolling_max': 'max',
188+
'rolling_var': 'var',
189+
'rolling_skew': 'skew',
190+
'rolling_kurt': 'kurt',
191+
'rolling_std': 'std'}
192+
df = DataFrame(arr).rolling(win)
193+
194+
@test_parallel(num_threads=2)
195+
def parallel_rolling():
196+
getattr(df, rolling[method])()
197+
self.parallel_rolling = parallel_rolling
198+
else:
199+
rolling = {'rolling_median': rolling_median,
200+
'rolling_mean': rolling_mean,
201+
'rolling_min': rolling_min,
202+
'rolling_max': rolling_max,
203+
'rolling_var': rolling_var,
204+
'rolling_skew': rolling_skew,
205+
'rolling_kurt': rolling_kurt,
206+
'rolling_std': rolling_std}
207+
208+
@test_parallel(num_threads=2)
209+
def parallel_rolling():
210+
rolling[method](arr, win)
211+
self.parallel_rolling = parallel_rolling
196212

197213
def time_rolling(self, method):
198214
self.parallel_rolling()

asv_bench/benchmarks/inference.py

+72-76
Original file line numberDiff line numberDiff line change
@@ -1,77 +1,80 @@
1-
from .pandas_vb_common import *
2-
import pandas as pd
1+
import numpy as np
2+
import pandas.util.testing as tm
3+
from pandas import DataFrame, Series, to_numeric
34

5+
from .pandas_vb_common import numeric_dtypes, lib, setup # noqa
46

5-
class DtypeInfer(object):
6-
goal_time = 0.2
77

8+
class NumericInferOps(object):
89
# from GH 7332
10+
goal_time = 0.2
11+
params = numeric_dtypes
12+
param_names = ['dtype']
13+
14+
def setup(self, dtype):
15+
N = 5 * 10**5
16+
self.df = DataFrame({'A': np.arange(N).astype(dtype),
17+
'B': np.arange(N).astype(dtype)})
18+
19+
def time_add(self, dtype):
20+
self.df['A'] + self.df['B']
21+
22+
def time_subtract(self, dtype):
23+
self.df['A'] - self.df['B']
924

10-
def setup(self):
11-
self.N = 500000
12-
self.df_int64 = DataFrame(dict(A=np.arange(self.N, dtype='int64'),
13-
B=np.arange(self.N, dtype='int64')))
14-
self.df_int32 = DataFrame(dict(A=np.arange(self.N, dtype='int32'),
15-
B=np.arange(self.N, dtype='int32')))
16-
self.df_uint32 = DataFrame(dict(A=np.arange(self.N, dtype='uint32'),
17-
B=np.arange(self.N, dtype='uint32')))
18-
self.df_float64 = DataFrame(dict(A=np.arange(self.N, dtype='float64'),
19-
B=np.arange(self.N, dtype='float64')))
20-
self.df_float32 = DataFrame(dict(A=np.arange(self.N, dtype='float32'),
21-
B=np.arange(self.N, dtype='float32')))
22-
self.df_datetime64 = DataFrame(dict(A=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms'),
23-
B=pd.to_datetime(np.arange(self.N, dtype='int64'), unit='ms')))
24-
self.df_timedelta64 = DataFrame(dict(A=(self.df_datetime64['A'] - self.df_datetime64['B']),
25-
B=self.df_datetime64['B']))
25+
def time_multiply(self, dtype):
26+
self.df['A'] * self.df['B']
2627

27-
def time_int64(self):
28-
(self.df_int64['A'] + self.df_int64['B'])
28+
def time_divide(self, dtype):
29+
self.df['A'] / self.df['B']
2930

30-
def time_int32(self):
31-
(self.df_int32['A'] + self.df_int32['B'])
31+
def time_modulo(self, dtype):
32+
self.df['A'] % self.df['B']
3233

33-
def time_uint32(self):
34-
(self.df_uint32['A'] + self.df_uint32['B'])
3534

36-
def time_float64(self):
37-
(self.df_float64['A'] + self.df_float64['B'])
35+
class DateInferOps(object):
36+
# from GH 7332
37+
goal_time = 0.2
38+
39+
def setup_cache(self):
40+
N = 5 * 10**5
41+
df = DataFrame({'datetime64': np.arange(N).astype('datetime64[ms]')})
42+
df['timedelta'] = df['datetime64'] - df['datetime64']
43+
return df
3844

39-
def time_float32(self):
40-
(self.df_float32['A'] + self.df_float32['B'])
45+
def time_subtract_datetimes(self, df):
46+
df['datetime64'] - df['datetime64']
4147

42-
def time_datetime64(self):
43-
(self.df_datetime64['A'] - self.df_datetime64['B'])
48+
def time_timedelta_plus_datetime(self, df):
49+
df['timedelta'] + df['datetime64']
4450

45-
def time_timedelta64_1(self):
46-
(self.df_timedelta64['A'] + self.df_timedelta64['B'])
51+
def time_add_timedeltas(self, df):
52+
df['timedelta'] + df['timedelta']
4753

48-
def time_timedelta64_2(self):
49-
(self.df_timedelta64['A'] + self.df_timedelta64['A'])
5054

55+
class ToNumeric(object):
5156

52-
class to_numeric(object):
5357
goal_time = 0.2
58+
params = ['ignore', 'coerce']
59+
param_names = ['errors']
5460

55-
def setup(self):
56-
self.n = 10000
57-
self.float = Series(np.random.randn(self.n * 100))
61+
def setup(self, errors):
62+
N = 10000
63+
self.float = Series(np.random.randn(N))
5864
self.numstr = self.float.astype('str')
59-
self.str = Series(tm.makeStringIndex(self.n))
65+
self.str = Series(tm.makeStringIndex(N))
6066

61-
def time_from_float(self):
62-
pd.to_numeric(self.float)
67+
def time_from_float(self, errors):
68+
to_numeric(self.float, errors=errors)
6369

64-
def time_from_numeric_str(self):
65-
pd.to_numeric(self.numstr)
70+
def time_from_numeric_str(self, errors):
71+
to_numeric(self.numstr, errors=errors)
6672

67-
def time_from_str_ignore(self):
68-
pd.to_numeric(self.str, errors='ignore')
73+
def time_from_str(self, errors):
74+
to_numeric(self.str, errors=errors)
6975

70-
def time_from_str_coerce(self):
71-
pd.to_numeric(self.str, errors='coerce')
7276

73-
74-
class to_numeric_downcast(object):
77+
class ToNumericDowncast(object):
7578

7679
param_names = ['dtype', 'downcast']
7780
params = [['string-float', 'string-int', 'string-nint', 'datetime64',
@@ -81,37 +84,30 @@ class to_numeric_downcast(object):
8184
N = 500000
8285
N2 = int(N / 2)
8386

84-
data_dict = {
85-
'string-int': (['1'] * N2) + ([2] * N2),
86-
'string-nint': (['-1'] * N2) + ([2] * N2),
87-
'datetime64': np.repeat(np.array(['1970-01-01', '1970-01-02'],
88-
dtype='datetime64[D]'), N),
89-
'string-float': (['1.1'] * N2) + ([2] * N2),
90-
'int-list': ([1] * N2) + ([2] * N2),
91-
'int32': np.repeat(np.int32(1), N)
92-
}
87+
data_dict = {'string-int': ['1'] * N2 + [2] * N2,
88+
'string-nint': ['-1'] * N2 + [2] * N2,
89+
'datetime64': np.repeat(np.array(['1970-01-01', '1970-01-02'],
90+
dtype='datetime64[D]'), N),
91+
'string-float': ['1.1'] * N2 + [2] * N2,
92+
'int-list': [1] * N2 + [2] * N2,
93+
'int32': np.repeat(np.int32(1), N)}
9394

9495
def setup(self, dtype, downcast):
9596
self.data = self.data_dict[dtype]
9697

9798
def time_downcast(self, dtype, downcast):
98-
pd.to_numeric(self.data, downcast=downcast)
99+
to_numeric(self.data, downcast=downcast)
99100

100101

101102
class MaybeConvertNumeric(object):
102103

103-
def setup(self):
104-
n = 1000000
105-
arr = np.repeat([2**63], n)
106-
arr = arr + np.arange(n).astype('uint64')
107-
arr = np.array([arr[i] if i%2 == 0 else
108-
str(arr[i]) for i in range(n)],
109-
dtype=object)
110-
111-
arr[-1] = -1
112-
self.data = arr
113-
self.na_values = set()
114-
115-
def time_convert(self):
116-
lib.maybe_convert_numeric(self.data, self.na_values,
117-
coerce_numeric=False)
104+
def setup_cache(self):
105+
N = 10**6
106+
arr = np.repeat([2**63], N) + np.arange(N).astype('uint64')
107+
data = arr.astype(object)
108+
data[1::2] = arr[1::2].astype(str)
109+
data[-1] = -1
110+
return data
111+
112+
def time_convert(self, data):
113+
lib.maybe_convert_numeric(data, set(), coerce_numeric=False)

asv_bench/benchmarks/io/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)