Skip to content

Commit 7481c30

Browse files
committed
Merge commit 'v0.19.0-14-ga40e185' into debian
* commit 'v0.19.0-14-ga40e185': (37 commits) BUG: Bug in localizing an ambiguous timezone when a boolean is passed Convert readthedocs links for their .org -> .io migration for hosted projects (pandas-dev#14406) DOC: formatting in basics.rst BLD/CI: cython cache pxd files (pandas-dev#14363) BUG: set_levels set illegal levels. (pandas-dev#14236) DOC: add whitespace to v0.19.1 bug fix section change impl details slightly for pandas-dev#14292 BUG: Fix concat key name DOC: add 0.19.1 whatsnew file (pandas-dev#14366) DOC: to_csv warns regarding quoting behaviour for floats pandas-dev#14195 (pandas-dev#14228) DOC: fix formatting issue with msgpack table TST: pandas-dev#14345 fixes TestDatetimeIndexOps test_nat AssertionErrors on 32-bit docs: Remove old warning from dsintro.rst (pandas-dev#14365) DOC: minor v0.19.0 whatsnew corrections RLS: v0.19.0 DOC: update release notes DOC: Latest fixes for whatsnew file to_latex encoding follows the documentation (py2 ascii, py3 utf8) (pandas-dev#14329) DOC: fix some sphinx build issues (pandas-dev#14332) TST: fix period tests for numpy 1.9.3 (GH14183) (pandas-dev#14331) ...
2 parents 1e607c8 + a40e185 commit 7481c30

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+1356
-816
lines changed

.github/CONTRIBUTING.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ Please try to maintain backward compatibility. *pandas* has lots of users with l
278278

279279
Adding tests is one of the most common requests after code is pushed to *pandas*. Therefore, it is worth getting in the habit of writing tests ahead of time so this is never an issue.
280280

281-
Like many packages, *pandas* uses the [Nose testing system](http://nose.readthedocs.org/en/latest/index.html) and the convenient extensions in [numpy.testing](http://docs.scipy.org/doc/numpy/reference/routines.testing.html).
281+
Like many packages, *pandas* uses the [Nose testing system](https://nose.readthedocs.io/en/latest/index.html) and the convenient extensions in [numpy.testing](http://docs.scipy.org/doc/numpy/reference/routines.testing.html).
282282

283283
#### Writing tests
284284

@@ -323,7 +323,7 @@ Performance matters and it is worth considering whether your code has introduced
323323
>
324324
> The asv benchmark suite was translated from the previous framework, vbench, so many stylistic issues are likely a result of automated transformation of the code.
325325
326-
To use asv you will need either `conda` or `virtualenv`. For more details please check the [asv installation webpage](http://asv.readthedocs.org/en/latest/installing.html).
326+
To use asv you will need either `conda` or `virtualenv`. For more details please check the [asv installation webpage](https://asv.readthedocs.io/en/latest/installing.html).
327327

328328
To install asv:
329329

@@ -360,7 +360,7 @@ This command is equivalent to:
360360

361361
This will launch every test only once, display stderr from the benchmarks, and use your local `python` that comes from your `$PATH`.
362362

363-
Information on how to write a benchmark can be found in the [asv documentation](http://asv.readthedocs.org/en/latest/writing_benchmarks.html).
363+
Information on how to write a benchmark can be found in the [asv documentation](https://asv.readthedocs.io/en/latest/writing_benchmarks.html).
364364

365365
#### Running the vbench performance test suite (phasing out)
366366

.github/ISSUE_TEMPLATE.md

+11-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
1-
#### Code Sample, a copy-pastable example if possible
1+
#### A small, complete example of the issue
2+
3+
```python
4+
# Your code here
5+
6+
```
27

38
#### Expected Output
49

5-
#### output of ``pd.show_versions()``
10+
#### Output of ``pd.show_versions()``
11+
12+
<details>
13+
# Paste the output here
614

15+
</details>

asv_bench/benchmarks/gil.py

+54-119
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def wrapper(fname):
2222
return wrapper
2323

2424

25-
class nogil_groupby_count_2(object):
25+
class nogil_groupby_base(object):
2626
goal_time = 0.2
2727

2828
def setup(self):
@@ -33,6 +33,9 @@ def setup(self):
3333
if (not have_real_test_parallel):
3434
raise NotImplementedError
3535

36+
37+
class nogil_groupby_count_2(nogil_groupby_base):
38+
3639
def time_nogil_groupby_count_2(self):
3740
self.pg2()
3841

@@ -41,16 +44,7 @@ def pg2(self):
4144
self.df.groupby('key')['data'].count()
4245

4346

44-
class nogil_groupby_last_2(object):
45-
goal_time = 0.2
46-
47-
def setup(self):
48-
self.N = 1000000
49-
self.ngroups = 1000
50-
np.random.seed(1234)
51-
self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), })
52-
if (not have_real_test_parallel):
53-
raise NotImplementedError
47+
class nogil_groupby_last_2(nogil_groupby_base):
5448

5549
def time_nogil_groupby_last_2(self):
5650
self.pg2()
@@ -60,16 +54,7 @@ def pg2(self):
6054
self.df.groupby('key')['data'].last()
6155

6256

63-
class nogil_groupby_max_2(object):
64-
goal_time = 0.2
65-
66-
def setup(self):
67-
self.N = 1000000
68-
self.ngroups = 1000
69-
np.random.seed(1234)
70-
self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), })
71-
if (not have_real_test_parallel):
72-
raise NotImplementedError
57+
class nogil_groupby_max_2(nogil_groupby_base):
7358

7459
def time_nogil_groupby_max_2(self):
7560
self.pg2()
@@ -79,16 +64,7 @@ def pg2(self):
7964
self.df.groupby('key')['data'].max()
8065

8166

82-
class nogil_groupby_mean_2(object):
83-
goal_time = 0.2
84-
85-
def setup(self):
86-
self.N = 1000000
87-
self.ngroups = 1000
88-
np.random.seed(1234)
89-
self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), })
90-
if (not have_real_test_parallel):
91-
raise NotImplementedError
67+
class nogil_groupby_mean_2(nogil_groupby_base):
9268

9369
def time_nogil_groupby_mean_2(self):
9470
self.pg2()
@@ -98,16 +74,7 @@ def pg2(self):
9874
self.df.groupby('key')['data'].mean()
9975

10076

101-
class nogil_groupby_min_2(object):
102-
goal_time = 0.2
103-
104-
def setup(self):
105-
self.N = 1000000
106-
self.ngroups = 1000
107-
np.random.seed(1234)
108-
self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), })
109-
if (not have_real_test_parallel):
110-
raise NotImplementedError
77+
class nogil_groupby_min_2(nogil_groupby_base):
11178

11279
def time_nogil_groupby_min_2(self):
11380
self.pg2()
@@ -117,16 +84,7 @@ def pg2(self):
11784
self.df.groupby('key')['data'].min()
11885

11986

120-
class nogil_groupby_prod_2(object):
121-
goal_time = 0.2
122-
123-
def setup(self):
124-
self.N = 1000000
125-
self.ngroups = 1000
126-
np.random.seed(1234)
127-
self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), })
128-
if (not have_real_test_parallel):
129-
raise NotImplementedError
87+
class nogil_groupby_prod_2(nogil_groupby_base):
13088

13189
def time_nogil_groupby_prod_2(self):
13290
self.pg2()
@@ -136,16 +94,7 @@ def pg2(self):
13694
self.df.groupby('key')['data'].prod()
13795

13896

139-
class nogil_groupby_sum_2(object):
140-
goal_time = 0.2
141-
142-
def setup(self):
143-
self.N = 1000000
144-
self.ngroups = 1000
145-
np.random.seed(1234)
146-
self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), })
147-
if (not have_real_test_parallel):
148-
raise NotImplementedError
97+
class nogil_groupby_sum_2(nogil_groupby_base):
14998

15099
def time_nogil_groupby_sum_2(self):
151100
self.pg2()
@@ -155,107 +104,93 @@ def pg2(self):
155104
self.df.groupby('key')['data'].sum()
156105

157106

158-
class nogil_groupby_sum_4(object):
159-
goal_time = 0.2
160-
161-
def setup(self):
162-
self.N = 1000000
163-
self.ngroups = 1000
164-
np.random.seed(1234)
165-
self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), })
166-
if (not have_real_test_parallel):
167-
raise NotImplementedError
107+
class nogil_groupby_sum_4(nogil_groupby_base):
168108

169109
def time_nogil_groupby_sum_4(self):
170110
self.pg4()
171111

172112
def f(self):
173113
self.df.groupby('key')['data'].sum()
174114

175-
def g2(self):
176-
for i in range(2):
177-
self.f()
178-
179115
def g4(self):
180116
for i in range(4):
181117
self.f()
182118

183-
def g8(self):
184-
for i in range(8):
185-
self.f()
186-
187-
@test_parallel(num_threads=2)
188-
def pg2(self):
189-
self.f()
190-
191119
@test_parallel(num_threads=4)
192120
def pg4(self):
193121
self.f()
194122

195-
@test_parallel(num_threads=8)
196-
def pg8(self):
197-
self.f()
198123

199-
200-
class nogil_groupby_sum_8(object):
201-
goal_time = 0.2
202-
203-
def setup(self):
204-
self.N = 1000000
205-
self.ngroups = 1000
206-
np.random.seed(1234)
207-
self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), })
208-
if (not have_real_test_parallel):
209-
raise NotImplementedError
124+
class nogil_groupby_sum_8(nogil_groupby_base):
210125

211126
def time_nogil_groupby_sum_8(self):
212127
self.pg8()
213128

214129
def f(self):
215130
self.df.groupby('key')['data'].sum()
216131

217-
def g2(self):
218-
for i in range(2):
219-
self.f()
220-
221-
def g4(self):
222-
for i in range(4):
223-
self.f()
224-
225132
def g8(self):
226133
for i in range(8):
227134
self.f()
228135

229-
@test_parallel(num_threads=2)
230-
def pg2(self):
231-
self.f()
232-
233-
@test_parallel(num_threads=4)
234-
def pg4(self):
235-
self.f()
236-
237136
@test_parallel(num_threads=8)
238137
def pg8(self):
239138
self.f()
240139

241140

242-
class nogil_groupby_var_2(object):
141+
class nogil_groupby_var_2(nogil_groupby_base):
142+
143+
def time_nogil_groupby_var_2(self):
144+
self.pg2()
145+
146+
@test_parallel(num_threads=2)
147+
def pg2(self):
148+
self.df.groupby('key')['data'].var()
149+
150+
151+
class nogil_groupby_groups(object):
243152
goal_time = 0.2
244153

245154
def setup(self):
246-
self.N = 1000000
247-
self.ngroups = 1000
248155
np.random.seed(1234)
249-
self.df = DataFrame({'key': np.random.randint(0, self.ngroups, size=self.N), 'data': np.random.randn(self.N), })
156+
self.size = 2**22
157+
self.ngroups = 100
158+
self.data = Series(np.random.randint(0, self.ngroups, size=self.size))
250159
if (not have_real_test_parallel):
251160
raise NotImplementedError
252161

253-
def time_nogil_groupby_var_2(self):
162+
def f(self):
163+
self.data.groupby(self.data).groups
164+
165+
166+
class nogil_groupby_groups_2(nogil_groupby_groups):
167+
168+
def time_nogil_groupby_groups(self):
254169
self.pg2()
255170

256171
@test_parallel(num_threads=2)
257172
def pg2(self):
258-
self.df.groupby('key')['data'].var()
173+
self.f()
174+
175+
176+
class nogil_groupby_groups_4(nogil_groupby_groups):
177+
178+
def time_nogil_groupby_groups(self):
179+
self.pg4()
180+
181+
@test_parallel(num_threads=4)
182+
def pg4(self):
183+
self.f()
184+
185+
186+
class nogil_groupby_groups_8(nogil_groupby_groups):
187+
188+
def time_nogil_groupby_groups(self):
189+
self.pg8()
190+
191+
@test_parallel(num_threads=8)
192+
def pg8(self):
193+
self.f()
259194

260195

261196
class nogil_take1d_float64(object):

asv_bench/benchmarks/groupby.py

+26
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,32 @@ def time_groupby_apply_dict_return(self):
3232
self.data.groupby(self.labels).apply(self.f)
3333

3434

35+
#----------------------------------------------------------------------
36+
# groups
37+
38+
class groupby_groups(object):
39+
goal_time = 0.1
40+
41+
def setup(self):
42+
size = 2**22
43+
self.data = Series(np.random.randint(0, 100, size=size))
44+
self.data2 = Series(np.random.randint(0, 10000, size=size))
45+
self.data3 = Series(tm.makeStringIndex(100).take(np.random.randint(0, 100, size=size)))
46+
self.data4 = Series(tm.makeStringIndex(10000).take(np.random.randint(0, 10000, size=size)))
47+
48+
def time_groupby_groups_int64_small(self):
49+
self.data.groupby(self.data).groups
50+
51+
def time_groupby_groups_int64_large(self):
52+
self.data2.groupby(self.data2).groups
53+
54+
def time_groupby_groups_object_small(self):
55+
self.data3.groupby(self.data3).groups
56+
57+
def time_groupby_groups_object_large(self):
58+
self.data4.groupby(self.data4).groups
59+
60+
3561
#----------------------------------------------------------------------
3662
# First / last functions
3763

ci/prep_cython_cache.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
ls "$HOME/.cache/"
44

55
PYX_CACHE_DIR="$HOME/.cache/pyxfiles"
6-
pyx_file_list=`find ${TRAVIS_BUILD_DIR} -name "*.pyx"`
7-
pyx_cache_file_list=`find ${PYX_CACHE_DIR} -name "*.pyx"`
6+
pyx_file_list=`find ${TRAVIS_BUILD_DIR} -name "*.pyx" -o -name "*.pxd"`
7+
pyx_cache_file_list=`find ${PYX_CACHE_DIR} -name "*.pyx" -o -name "*.pxd"`
88

99
CACHE_File="$HOME/.cache/cython_files.tar"
1010

ci/requirements-2.7_DOC_BUILD.run

+1
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ sqlalchemy
1818
numexpr
1919
bottleneck
2020
statsmodels
21+
pyqt=4.11.4

ci/submit_cython_cache.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
CACHE_File="$HOME/.cache/cython_files.tar"
44
PYX_CACHE_DIR="$HOME/.cache/pyxfiles"
5-
pyx_file_list=`find ${TRAVIS_BUILD_DIR} -name "*.pyx"`
5+
pyx_file_list=`find ${TRAVIS_BUILD_DIR} -name "*.pyx" -o -name "*.pxd"`
66

77
rm -rf $CACHE_File
88
rm -rf $PYX_CACHE_DIR

0 commit comments

Comments
 (0)