Skip to content

Commit 90ec575

Browse files
committed
2 parents e1d3b66 + 81694dc commit 90ec575

File tree

292 files changed

+11795
-6533
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

292 files changed

+11795
-6533
lines changed

.travis.yml

+2
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ script:
121121
- ci/script_single.sh
122122
- ci/script_multi.sh
123123
- ci/lint.sh
124+
- echo "checking imports"
125+
- source activate pandas && python ci/check_imports.py
124126
- echo "script done"
125127

126128
after_success:

appveyor.yml

+7-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ install:
5959

6060
# install our build environment
6161
- cmd: conda config --set show_channel_urls true --set always_yes true --set changeps1 false
62-
- cmd: conda update -q conda
62+
# - cmd: conda update -q conda
6363
- cmd: conda config --set ssl_verify false
6464

6565
# add the pandas channel *before* defaults to have defaults take priority
@@ -74,12 +74,18 @@ install:
7474
# create our env
7575
- cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest>=3.1.0 pytest-xdist
7676
- cmd: activate pandas
77+
- cmd: pip install moto
7778
- SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run
7879
- cmd: echo "installing requirements from %REQ%"
7980
- cmd: conda install -n pandas --file=%REQ%
8081
- cmd: conda list -n pandas
8182
- cmd: echo "installing requirements from %REQ% - done"
8283

84+
# add some pip only reqs to the env
85+
- SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.pip
86+
- cmd: echo "installing requirements from %REQ%"
87+
- cmd: pip install -Ur %REQ%
88+
8389
# build em using the local source checkout in the correct windows env
8490
- cmd: '%CMD_IN_ENV% python setup.py build_ext --inplace'
8591

asv_bench/benchmarks/categoricals.py

+3
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ def time_value_counts_dropna(self):
6767
def time_rendering(self):
6868
str(self.sel)
6969

70+
def time_set_categories(self):
71+
self.ts.cat.set_categories(self.ts.cat.categories[::2])
72+
7073

7174
class Categoricals3(object):
7275
goal_time = 0.2

asv_bench/benchmarks/index_object.py

+20
Original file line numberDiff line numberDiff line change
@@ -199,3 +199,23 @@ def time_datetime_level_values_full(self):
199199

200200
def time_datetime_level_values_sliced(self):
201201
self.mi[:10].values
202+
203+
204+
class Range(object):
205+
goal_time = 0.2
206+
207+
def setup(self):
208+
self.idx_inc = RangeIndex(start=0, stop=10**7, step=3)
209+
self.idx_dec = RangeIndex(start=10**7, stop=-1, step=-3)
210+
211+
def time_max(self):
212+
self.idx_inc.max()
213+
214+
def time_max_trivial(self):
215+
self.idx_dec.max()
216+
217+
def time_min(self):
218+
self.idx_dec.min()
219+
220+
def time_min_trivial(self):
221+
self.idx_inc.min()

asv_bench/benchmarks/io_bench.py

+30
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import os
12
from .pandas_vb_common import *
23
from pandas import concat, Timestamp, compat
34
try:
@@ -192,3 +193,32 @@ def time_read_nrows(self, compression, engine):
192193
ext = ".bz2"
193194
pd.read_csv(self.big_fname + ext, nrows=10,
194195
compression=compression, engine=engine)
196+
197+
198+
class read_json_lines(object):
199+
goal_time = 0.2
200+
fname = "__test__.json"
201+
202+
def setup(self):
203+
self.N = 100000
204+
self.C = 5
205+
self.df = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]))
206+
self.df.to_json(self.fname,orient="records",lines=True)
207+
208+
def teardown(self):
209+
try:
210+
os.remove(self.fname)
211+
except:
212+
pass
213+
214+
def time_read_json_lines(self):
215+
pd.read_json(self.fname, lines=True)
216+
217+
def time_read_json_lines_chunk(self):
218+
pd.concat(pd.read_json(self.fname, lines=True, chunksize=self.N//4))
219+
220+
def peakmem_read_json_lines(self):
221+
pd.read_json(self.fname, lines=True)
222+
223+
def peakmem_read_json_lines_chunk(self):
224+
pd.concat(pd.read_json(self.fname, lines=True, chunksize=self.N//4))

asv_bench/benchmarks/period.py

+88
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,35 @@
22
from pandas import Series, Period, PeriodIndex, date_range
33

44

5+
class PeriodProperties(object):
6+
def setup(self):
7+
self.per = Period('2012-06-01', freq='M')
8+
9+
def time_year(self):
10+
self.per.year
11+
12+
def time_month(self):
13+
self.per.month
14+
15+
def time_quarter(self):
16+
self.per.quarter
17+
18+
def time_day(self):
19+
self.per.day
20+
21+
def time_hour(self):
22+
self.per.hour
23+
24+
def time_minute(self):
25+
self.per.second
26+
27+
def time_second(self):
28+
self.per.second
29+
30+
def time_leap_year(self):
31+
self.per.is_leapyear
32+
33+
534
class Constructor(object):
635
goal_time = 0.2
736

@@ -49,6 +78,65 @@ def time_value_counts_pindex(self):
4978
self.i.value_counts()
5079

5180

81+
class Properties(object):
82+
def setup(self):
83+
self.per = Period('2017-09-06 08:28', freq='min')
84+
85+
def time_year(self):
86+
self.per.year
87+
88+
def time_month(self):
89+
self.per.month
90+
91+
def time_day(self):
92+
self.per.day
93+
94+
def time_hour(self):
95+
self.per.hour
96+
97+
def time_minute(self):
98+
self.per.minute
99+
100+
def time_second(self):
101+
self.per.second
102+
103+
def time_is_leap_year(self):
104+
self.per.is_leap_year
105+
106+
def time_quarter(self):
107+
self.per.quarter
108+
109+
def time_qyear(self):
110+
self.per.qyear
111+
112+
def time_week(self):
113+
self.per.week
114+
115+
def time_daysinmonth(self):
116+
self.per.daysinmonth
117+
118+
def time_dayofweek(self):
119+
self.per.dayofweek
120+
121+
def time_dayofyear(self):
122+
self.per.dayofyear
123+
124+
def time_start_time(self):
125+
self.per.start_time
126+
127+
def time_end_time(self):
128+
self.per.end_time
129+
130+
def time_to_timestamp():
131+
self.per.to_timestamp()
132+
133+
def time_now():
134+
self.per.now()
135+
136+
def time_asfreq():
137+
self.per.asfreq('A')
138+
139+
52140
class period_standard_indexing(object):
53141
goal_time = 0.2
54142

asv_bench/benchmarks/sparse.py

+66-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
from itertools import repeat
1+
import itertools
22

33
from .pandas_vb_common import *
44
import scipy.sparse
5-
from pandas import SparseSeries, SparseDataFrame
5+
from pandas import SparseSeries, SparseDataFrame, SparseArray
66

77

88
class sparse_series_to_frame(object):
@@ -23,6 +23,69 @@ def time_sparse_series_to_frame(self):
2323
SparseDataFrame(self.series)
2424

2525

26+
class sparse_array_constructor(object):
27+
goal_time = 0.2
28+
29+
def setup(self):
30+
np.random.seed(1)
31+
self.int64_10percent = self.make_numeric_array(length=1000000, dense_size=100000, fill_value=0, dtype=np.int64)
32+
self.int64_1percent = self.make_numeric_array(length=1000000, dense_size=10000, fill_value=0, dtype=np.int64)
33+
34+
self.float64_10percent = self.make_numeric_array(length=1000000, dense_size=100000, fill_value=np.nan, dtype=np.float64)
35+
self.float64_1percent = self.make_numeric_array(length=1000000, dense_size=10000, fill_value=np.nan, dtype=np.float64)
36+
37+
self.object_nan_fill_value_10percent = self.make_object_array(length=1000000, dense_size=100000, fill_value=np.nan)
38+
self.object_nan_fill_value_1percent = self.make_object_array(length=1000000, dense_size=10000, fill_value=np.nan)
39+
40+
self.object_non_nan_fill_value_10percent = self.make_object_array(length=1000000, dense_size=100000, fill_value=0)
41+
self.object_non_nan_fill_value_1percent = self.make_object_array(length=1000000, dense_size=10000, fill_value=0)
42+
43+
def make_numeric_array(self, length, dense_size, fill_value, dtype):
44+
arr = np.array([fill_value] * length, dtype=dtype)
45+
indexer = np.unique(np.random.randint(0, length, dense_size))
46+
arr[indexer] = np.random.randint(0, 100, len(indexer))
47+
return (arr, fill_value, dtype)
48+
49+
def make_object_array(self, length, dense_size, fill_value):
50+
elems = np.array(['a', 0.0, False, 1, 2], dtype=np.object)
51+
arr = np.array([fill_value] * length, dtype=np.object)
52+
indexer = np.unique(np.random.randint(0, length, dense_size))
53+
arr[indexer] = np.random.choice(elems, len(indexer))
54+
return (arr, fill_value, np.object)
55+
56+
def time_sparse_array_constructor_int64_10percent(self):
57+
arr, fill_value, dtype = self.int64_10percent
58+
SparseArray(arr, fill_value=fill_value, dtype=dtype)
59+
60+
def time_sparse_array_constructor_int64_1percent(self):
61+
arr, fill_value, dtype = self.int64_1percent
62+
SparseArray(arr, fill_value=fill_value, dtype=dtype)
63+
64+
def time_sparse_array_constructor_float64_10percent(self):
65+
arr, fill_value, dtype = self.float64_10percent
66+
SparseArray(arr, fill_value=fill_value, dtype=dtype)
67+
68+
def time_sparse_array_constructor_float64_1percent(self):
69+
arr, fill_value, dtype = self.float64_1percent
70+
SparseArray(arr, fill_value=fill_value, dtype=dtype)
71+
72+
def time_sparse_array_constructor_object_nan_fill_value_10percent(self):
73+
arr, fill_value, dtype = self.object_nan_fill_value_10percent
74+
SparseArray(arr, fill_value=fill_value, dtype=dtype)
75+
76+
def time_sparse_array_constructor_object_nan_fill_value_1percent(self):
77+
arr, fill_value, dtype = self.object_nan_fill_value_1percent
78+
SparseArray(arr, fill_value=fill_value, dtype=dtype)
79+
80+
def time_sparse_array_constructor_object_non_nan_fill_value_10percent(self):
81+
arr, fill_value, dtype = self.object_non_nan_fill_value_10percent
82+
SparseArray(arr, fill_value=fill_value, dtype=dtype)
83+
84+
def time_sparse_array_constructor_object_non_nan_fill_value_1percent(self):
85+
arr, fill_value, dtype = self.object_non_nan_fill_value_1percent
86+
SparseArray(arr, fill_value=fill_value, dtype=dtype)
87+
88+
2689
class sparse_frame_constructor(object):
2790
goal_time = 0.2
2891

@@ -33,7 +96,7 @@ def time_sparse_from_scipy(self):
3396
SparseDataFrame(scipy.sparse.rand(1000, 1000, 0.005))
3497

3598
def time_sparse_from_dict(self):
36-
SparseDataFrame(dict(zip(range(1000), repeat([0]))))
99+
SparseDataFrame(dict(zip(range(1000), itertools.repeat([0]))))
37100

38101

39102
class sparse_series_from_coo(object):

asv_bench/benchmarks/timeseries.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def setup(self):
5656
self.no_freq = self.rng7[:50000].append(self.rng7[50002:])
5757
self.d_freq = self.rng7[:50000].append(self.rng7[50000:])
5858

59-
self.rng8 = date_range(start='1/1/1700', freq='B', periods=100000)
59+
self.rng8 = date_range(start='1/1/1700', freq='B', periods=75000)
6060
self.b_freq = self.rng8[:50000].append(self.rng8[50000:])
6161

6262
def time_add_timedelta(self):

asv_bench/benchmarks/timestamp.py

+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
from .pandas_vb_common import *
2+
from pandas import to_timedelta, Timestamp
3+
import pytz
4+
import datetime
5+
6+
7+
class TimestampProperties(object):
8+
goal_time = 0.2
9+
10+
def setup(self):
11+
self.ts = Timestamp('2017-08-25 08:16:14')
12+
13+
def time_tz(self):
14+
self.ts.tz
15+
16+
def time_offset(self):
17+
self.ts.offset
18+
19+
def time_dayofweek(self):
20+
self.ts.dayofweek
21+
22+
def time_weekday_name(self):
23+
self.ts.weekday_name
24+
25+
def time_dayofyear(self):
26+
self.ts.dayofyear
27+
28+
def time_week(self):
29+
self.ts.week
30+
31+
def time_quarter(self):
32+
self.ts.quarter
33+
34+
def time_days_in_month(self):
35+
self.ts.days_in_month
36+
37+
def time_freqstr(self):
38+
self.ts.freqstr
39+
40+
def time_is_month_start(self):
41+
self.ts.is_month_start
42+
43+
def time_is_month_end(self):
44+
self.ts.is_month_end
45+
46+
def time_is_quarter_start(self):
47+
self.ts.is_quarter_start
48+
49+
def time_is_quarter_end(self):
50+
self.ts.is_quarter_end
51+
52+
def time_is_year_start(self):
53+
self.ts.is_quarter_end
54+
55+
def time_is_year_end(self):
56+
self.ts.is_quarter_end
57+
58+
def time_is_leap_year(self):
59+
self.ts.is_quarter_end
60+
61+
def time_microsecond(self):
62+
self.ts.microsecond
63+
64+
65+
class TimestampOps(object):
66+
goal_time = 0.2
67+
68+
def setup(self):
69+
self.ts = Timestamp('2017-08-25 08:16:14')
70+
self.ts_tz = Timestamp('2017-08-25 08:16:14', tz='US/Eastern')
71+
72+
dt = datetime.datetime(2016, 3, 27, 1)
73+
self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo
74+
self.ts2 = Timestamp(dt)
75+
76+
def time_replace_tz(self):
77+
self.ts.replace(tzinfo=pytz.timezone('US/Eastern'))
78+
79+
def time_replace_across_dst(self):
80+
self.ts2.replace(tzinfo=self.tzinfo)
81+
82+
def time_replace_None(self):
83+
self.ts_tz.replace(tzinfo=None)
84+
85+
def time_to_pydatetime(self):
86+
self.ts.to_pydatetime()
87+
88+
def time_to_pydatetime_tz(self):
89+
self.ts_tz.to_pydatetime()

0 commit comments

Comments
 (0)