Skip to content

Commit 91d989b

Browse files
committed
Merge branch 'master' of github.com:pandas-dev/pandas into fix_to_numeric_on_decimal_fields
2 parents d7972d7 + 8e630b6 commit 91d989b

File tree

117 files changed

+10643
-8345
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

117 files changed

+10643
-8345
lines changed

.travis.yml

Lines changed: 58 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -66,19 +66,6 @@ matrix:
6666
apt:
6767
packages:
6868
- python-gtk2
69-
- python: 3.4
70-
env:
71-
- PYTHON_VERSION=3.4
72-
- JOB_NAME: "34_nslow"
73-
- NOSE_ARGS="not slow and not disabled"
74-
- FULL_DEPS=true
75-
- CLIPBOARD=xsel
76-
- CACHE_NAME="34_nslow"
77-
- USE_CACHE=true
78-
addons:
79-
apt:
80-
packages:
81-
- xsel
8269
- python: 3.5
8370
env:
8471
- PYTHON_VERSION=3.5
@@ -93,6 +80,33 @@ matrix:
9380
apt:
9481
packages:
9582
- xsel
83+
- python: 3.6-dev
84+
env:
85+
- PYTHON_VERSION=3.6
86+
- JOB_NAME: "36_dev"
87+
- JOB_TAG=_DEV
88+
- NOSE_ARGS="not slow and not network and not disabled"
89+
- PANDAS_TESTING_MODE="deprecate"
90+
addons:
91+
apt:
92+
packages:
93+
- libatlas-base-dev
94+
- gfortran
95+
# In allow_failures
96+
- python: 2.7
97+
env:
98+
- PYTHON_VERSION=2.7
99+
- JOB_NAME: "27_nslow_nnet_COMPAT"
100+
- NOSE_ARGS="not slow and not network and not disabled"
101+
- LOCALE_OVERRIDE="it_IT.UTF-8"
102+
- INSTALL_TEST=true
103+
- JOB_TAG=_COMPAT
104+
- CACHE_NAME="27_nslow_nnet_COMPAT"
105+
- USE_CACHE=true
106+
addons:
107+
apt:
108+
packages:
109+
- language-pack-it
96110
# In allow_failures
97111
- python: 2.7
98112
env:
@@ -103,45 +117,46 @@ matrix:
103117
- FULL_DEPS=true
104118
- CACHE_NAME="27_slow"
105119
- USE_CACHE=true
120+
# In allow_failures
121+
- python: 2.7
122+
env:
123+
- PYTHON_VERSION=2.7
124+
- JOB_NAME: "27_build_test_conda"
125+
- JOB_TAG=_BUILD_TEST
126+
- NOSE_ARGS="not slow and not disabled"
127+
- FULL_DEPS=true
128+
- BUILD_TEST=true
129+
- CACHE_NAME="27_build_test_conda"
130+
- USE_CACHE=true
106131
# In allow_failures
107132
- python: 3.4
108133
env:
109134
- PYTHON_VERSION=3.4
110-
- JOB_NAME: "34_slow"
111-
- JOB_TAG=_SLOW
112-
- NOSE_ARGS="slow and not network and not disabled"
135+
- JOB_NAME: "34_nslow"
136+
- NOSE_ARGS="not slow and not disabled"
113137
- FULL_DEPS=true
114138
- CLIPBOARD=xsel
115-
- CACHE_NAME="34_slow"
139+
- CACHE_NAME="34_nslow"
116140
- USE_CACHE=true
117141
addons:
118142
apt:
119143
packages:
120144
- xsel
121145
# In allow_failures
122-
- python: 2.7
146+
- python: 3.4
123147
env:
124-
- PYTHON_VERSION=2.7
125-
- JOB_NAME: "27_build_test_conda"
126-
- JOB_TAG=_BUILD_TEST
127-
- NOSE_ARGS="not slow and not disabled"
148+
- PYTHON_VERSION=3.4
149+
- JOB_NAME: "34_slow"
150+
- JOB_TAG=_SLOW
151+
- NOSE_ARGS="slow and not network and not disabled"
128152
- FULL_DEPS=true
129-
- BUILD_TEST=true
130-
- CACHE_NAME="27_build_test_conda"
153+
- CLIPBOARD=xsel
154+
- CACHE_NAME="34_slow"
131155
- USE_CACHE=true
132-
# In allow_failures
133-
- python: 3.6-dev
134-
env:
135-
- PYTHON_VERSION=3.6
136-
- JOB_NAME: "36_dev"
137-
- JOB_TAG=_DEV
138-
- NOSE_ARGS="not slow and not network and not disabled"
139-
- PANDAS_TESTING_MODE="deprecate"
140156
addons:
141157
apt:
142158
packages:
143-
- libatlas-base-dev
144-
- gfortran
159+
- xsel
145160
# In allow_failures
146161
- python: 3.5
147162
env:
@@ -157,21 +172,6 @@ matrix:
157172
packages:
158173
- libatlas-base-dev
159174
- gfortran
160-
# In allow_failures
161-
- python: 2.7
162-
env:
163-
- PYTHON_VERSION=2.7
164-
- JOB_NAME: "27_nslow_nnet_COMPAT"
165-
- NOSE_ARGS="not slow and not network and not disabled"
166-
- LOCALE_OVERRIDE="it_IT.UTF-8"
167-
- INSTALL_TEST=true
168-
- JOB_TAG=_COMPAT
169-
- CACHE_NAME="27_nslow_nnet_COMPAT"
170-
- USE_CACHE=true
171-
addons:
172-
apt:
173-
packages:
174-
- language-pack-it
175175
# In allow_failures
176176
- python: 3.5
177177
env:
@@ -226,18 +226,19 @@ matrix:
226226
- BUILD_TEST=true
227227
- CACHE_NAME="27_build_test_conda"
228228
- USE_CACHE=true
229-
- python: 3.6-dev
229+
- python: 3.4
230230
env:
231-
- PYTHON_VERSION=3.6
232-
- JOB_NAME: "36_dev"
233-
- JOB_TAG=_DEV
234-
- NOSE_ARGS="not slow and not network and not disabled"
235-
- PANDAS_TESTING_MODE="deprecate"
231+
- PYTHON_VERSION=3.4
232+
- JOB_NAME: "34_nslow"
233+
- NOSE_ARGS="not slow and not disabled"
234+
- FULL_DEPS=true
235+
- CLIPBOARD=xsel
236+
- CACHE_NAME="34_nslow"
237+
- USE_CACHE=true
236238
addons:
237239
apt:
238240
packages:
239-
- libatlas-base-dev
240-
- gfortran
241+
- xsel
241242
- python: 3.5
242243
env:
243244
- PYTHON_VERSION=3.5

asv_bench/benchmarks/algorithms.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ class Algorithms(object):
88

99
def setup(self):
1010
N = 100000
11+
np.random.seed(1234)
1112

1213
self.int_unique = pd.Int64Index(np.arange(N * 5))
1314
# cache is_unique
@@ -17,17 +18,24 @@ def setup(self):
1718
self.float = pd.Float64Index(np.random.randn(N).repeat(5))
1819

1920
# Convenience naming.
20-
self.checked_add = pd.core.nanops._checked_add_with_arr
21+
self.checked_add = pd.core.algorithms.checked_add_with_arr
2122

2223
self.arr = np.arange(1000000)
2324
self.arrpos = np.arange(1000000)
2425
self.arrneg = np.arange(-1000000, 0)
2526
self.arrmixed = np.array([1, -1]).repeat(500000)
27+
self.strings = tm.makeStringIndex(100000)
28+
29+
self.arr_nan = np.random.choice([True, False], size=1000000)
30+
self.arrmixed_nan = np.random.choice([True, False], size=1000000)
2631

2732
# match
2833
self.uniques = tm.makeStringIndex(1000).values
2934
self.all = self.uniques.repeat(10)
3035

36+
def time_factorize_string(self):
37+
self.strings.factorize()
38+
3139
def time_factorize_int(self):
3240
self.int.factorize()
3341

@@ -64,6 +72,16 @@ def time_add_overflow_neg_arr(self):
6472
def time_add_overflow_mixed_arr(self):
6573
self.checked_add(self.arr, self.arrmixed)
6674

75+
def time_add_overflow_first_arg_nan(self):
76+
self.checked_add(self.arr, self.arrmixed, arr_mask=self.arr_nan)
77+
78+
def time_add_overflow_second_arg_nan(self):
79+
self.checked_add(self.arr, self.arrmixed, b_mask=self.arrmixed_nan)
80+
81+
def time_add_overflow_both_arg_nan(self):
82+
self.checked_add(self.arr, self.arrmixed, arr_mask=self.arr_nan,
83+
b_mask=self.arrmixed_nan)
84+
6785

6886
class Hashing(object):
6987
goal_time = 0.2

asv_bench/benchmarks/gil.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,3 +379,38 @@ def pg_read_csv_datetime(self):
379379

380380
def time_read_csv_datetime(self):
381381
self.pg_read_csv_datetime()
382+
383+
384+
class nogil_factorize(object):
385+
number = 1
386+
repeat = 5
387+
388+
def setup(self):
389+
if (not have_real_test_parallel):
390+
raise NotImplementedError
391+
392+
np.random.seed(1234)
393+
self.strings = tm.makeStringIndex(100000)
394+
395+
def factorize_strings(self):
396+
pd.factorize(self.strings)
397+
398+
@test_parallel(num_threads=4)
399+
def _pg_factorize_strings_4(self):
400+
self.factorize_strings()
401+
402+
def time_factorize_strings_4(self):
403+
for i in range(2):
404+
self._pg_factorize_strings_4()
405+
406+
@test_parallel(num_threads=2)
407+
def _pg_factorize_strings_2(self):
408+
self.factorize_strings()
409+
410+
def time_factorize_strings_2(self):
411+
for i in range(4):
412+
self._pg_factorize_strings_2()
413+
414+
def time_factorize_strings(self):
415+
for i in range(8):
416+
self.factorize_strings()

asv_bench/benchmarks/io_bench.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def setup(self, compression, engine):
153153
# The Python 2 C parser can't read bz2 from open files.
154154
raise NotImplementedError
155155
try:
156-
import boto
156+
import s3fs
157157
except ImportError:
158158
# Skip these benchmarks if `boto` is not installed.
159159
raise NotImplementedError

asv_bench/benchmarks/join_merge.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,12 +302,19 @@ def setup(self):
302302
self.df1 = self.df1.sort_values('time')
303303
self.df2 = self.df2.sort_values('time')
304304

305+
self.df1['time32'] = np.int32(self.df1.time)
306+
self.df2['time32'] = np.int32(self.df2.time)
307+
305308
self.df1a = self.df1[['time', 'value1']]
306309
self.df2a = self.df2[['time', 'value2']]
307310
self.df1b = self.df1[['time', 'key', 'value1']]
308311
self.df2b = self.df2[['time', 'key', 'value2']]
309312
self.df1c = self.df1[['time', 'key2', 'value1']]
310313
self.df2c = self.df2[['time', 'key2', 'value2']]
314+
self.df1d = self.df1[['time32', 'value1']]
315+
self.df2d = self.df2[['time32', 'value2']]
316+
self.df1e = self.df1[['time', 'key', 'key2', 'value1']]
317+
self.df2e = self.df2[['time', 'key', 'key2', 'value2']]
311318

312319
def time_noby(self):
313320
merge_asof(self.df1a, self.df2a, on='time')
@@ -318,6 +325,12 @@ def time_by_object(self):
318325
def time_by_int(self):
319326
merge_asof(self.df1c, self.df2c, on='time', by='key2')
320327

328+
def time_on_int32(self):
329+
merge_asof(self.df1d, self.df2d, on='time32')
330+
331+
def time_multiby(self):
332+
merge_asof(self.df1e, self.df2e, on='time', by=['key', 'key2'])
333+
321334

322335
#----------------------------------------------------------------------
323336
# data alignment

asv_bench/benchmarks/reshape.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from .pandas_vb_common import *
2-
from pandas.core.reshape import melt
2+
from pandas.core.reshape import melt, wide_to_long
33

44

55
class melt_dataframe(object):
@@ -74,3 +74,25 @@ def setup(self):
7474

7575
def time_unstack_sparse_keyspace(self):
7676
self.idf.unstack()
77+
78+
79+
class wide_to_long_big(object):
80+
goal_time = 0.2
81+
82+
def setup(self):
83+
vars = 'ABCD'
84+
nyrs = 20
85+
nidvars = 20
86+
N = 5000
87+
yrvars = []
88+
for var in vars:
89+
for yr in range(1, nyrs + 1):
90+
yrvars.append(var + str(yr))
91+
92+
self.df = pd.DataFrame(np.random.randn(N, nidvars + len(yrvars)),
93+
columns=list(range(nidvars)) + yrvars)
94+
self.vars = vars
95+
96+
def time_wide_to_long_big(self):
97+
self.df['id'] = self.df.index
98+
wide_to_long(self.df, list(self.vars), i='id', j='year')

asv_bench/benchmarks/series_methods.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,28 @@ def setup(self):
88
self.dr = pd.date_range(
99
start=datetime(2015,10,26),
1010
end=datetime(2016,1,1),
11-
freq='10s'
12-
) # ~500k long
11+
freq='50s'
12+
) # ~100k long
1313

1414
def time_series_constructor_no_data_datetime_index(self):
1515
Series(data=None, index=self.dr)
1616

1717

18+
class series_constructor_dict_data_datetime_index(object):
19+
goal_time = 0.2
20+
21+
def setup(self):
22+
self.dr = pd.date_range(
23+
start=datetime(2015, 10, 26),
24+
end=datetime(2016, 1, 1),
25+
freq='50s'
26+
) # ~100k long
27+
self.data = {d: v for d, v in zip(self.dr, range(len(self.dr)))}
28+
29+
def time_series_constructor_no_data_datetime_index(self):
30+
Series(data=self.data, index=self.dr)
31+
32+
1833
class series_isin_int64(object):
1934
goal_time = 0.2
2035

0 commit comments

Comments
 (0)