Skip to content

Commit 396088f

Browse files
committed
Merge remote-tracking branch 'upstream/master' into issue9943
2 parents 08eb291 + b669112 commit 396088f

File tree

137 files changed

+6693
-4272
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

137 files changed

+6693
-4272
lines changed

.gitignore

+3-2
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,9 @@ scikits
8888
*.c
8989
*.cpp
9090

91-
# Performance Testing #
92-
#######################
91+
# Unit / Performance Testing #
92+
##############################
93+
.pytest_cache/
9394
asv_bench/env/
9495
asv_bench/html/
9596
asv_bench/results/

README.md

+4-1
Original file line numberDiff line numberDiff line change
@@ -216,13 +216,16 @@ Further, general questions and discussions can also take place on the [pydata ma
216216
## Discussion and Development
217217
Most development discussion is taking place on github in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions.
218218

219-
## Contributing to pandas
219+
## Contributing to pandas [![Open Source Helpers](https://www.codetriage.com/pandas-dev/pandas/badges/users.svg)](https://www.codetriage.com/pandas-dev/pandas)
220+
220221
All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome.
221222

222223
A detailed overview on how to contribute can be found in the **[contributing guide.](https://pandas.pydata.org/pandas-docs/stable/contributing.html)**
223224

224225
If you are simply looking to start working with the pandas codebase, navigate to the [GitHub “issues” tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [Difficulty Novice](https://github.com/pandas-dev/pandas/issues?q=is%3Aopen+is%3Aissue+label%3A%22Difficulty+Novice%22) where you could start out.
225226

227+
You can also triage issues which may include reproducing bug reports, or asking for vital information such as version numbers or reproduction instructions. If you would like to start triaging issues, one easy way to get started is to [subscribe to pandas on CodeTriage](https://www.codetriage.com/pandas-dev/pandas).
228+
226229
Or maybe through using pandas you have an idea of your own or are looking for something in the documentation and thinking ‘this can be improved’...you can do something about it!
227230

228231
Feel free to ask questions on the [mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata) or on [Gitter](https://gitter.im/pydata/pandas).

asv_bench/benchmarks/groupby.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,13 @@
1111
from .pandas_vb_common import setup # noqa
1212

1313

14+
method_blacklist = {
15+
'object': {'median', 'prod', 'sem', 'cumsum', 'sum', 'cummin', 'mean',
16+
'max', 'skew', 'cumprod', 'cummax', 'rank', 'pct_change', 'min',
17+
'var', 'mad', 'describe', 'std'}
18+
}
19+
20+
1421
class ApplyDictReturn(object):
1522
goal_time = 0.2
1623

@@ -153,6 +160,7 @@ def time_frame_nth_any(self, df):
153160
def time_frame_nth(self, df):
154161
df.groupby(0).nth(0)
155162

163+
156164
def time_series_nth_any(self, df):
157165
df[1].groupby(df[0]).nth(0, dropna='any')
158166

@@ -369,23 +377,27 @@ class GroupByMethods(object):
369377
goal_time = 0.2
370378

371379
param_names = ['dtype', 'method']
372-
params = [['int', 'float'],
380+
params = [['int', 'float', 'object'],
373381
['all', 'any', 'bfill', 'count', 'cumcount', 'cummax', 'cummin',
374382
'cumprod', 'cumsum', 'describe', 'ffill', 'first', 'head',
375383
'last', 'mad', 'max', 'min', 'median', 'mean', 'nunique',
376384
'pct_change', 'prod', 'rank', 'sem', 'shift', 'size', 'skew',
377385
'std', 'sum', 'tail', 'unique', 'value_counts', 'var']]
378386

379387
def setup(self, dtype, method):
388+
if method in method_blacklist.get(dtype, {}):
389+
raise NotImplementedError # skip benchmark
380390
ngroups = 1000
381391
size = ngroups * 2
382392
rng = np.arange(ngroups)
383393
values = rng.take(np.random.randint(0, ngroups, size=size))
384394
if dtype == 'int':
385395
key = np.random.randint(0, size, size=size)
386-
else:
396+
elif dtype == 'float':
387397
key = np.concatenate([np.random.random(ngroups) * 0.1,
388398
np.random.random(ngroups) * 10.0])
399+
elif dtype == 'object':
400+
key = ['foo'] * size
389401

390402
df = DataFrame({'values': values, 'key': key})
391403
self.df_groupby_method = getattr(df.groupby('key')['values'], method)

ci/requirements-3.6_DOC.run

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ sphinx
55
nbconvert
66
nbformat
77
notebook
8-
matplotlib
8+
matplotlib=2.1*
99
seaborn
1010
scipy
1111
lxml

doc/make.py

+148-32
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,14 @@
1111
$ python make.py html
1212
$ python make.py latex
1313
"""
14+
import importlib
1415
import sys
1516
import os
1617
import shutil
17-
import subprocess
18+
# import subprocess
1819
import argparse
1920
from contextlib import contextmanager
21+
import webbrowser
2022
import jinja2
2123

2224

@@ -26,28 +28,6 @@
2628
BUILD_DIRS = ['doctrees', 'html', 'latex', 'plots', '_static', '_templates']
2729

2830

29-
def _generate_index(include_api, single_doc=None):
30-
"""Create index.rst file with the specified sections.
31-
32-
Parameters
33-
----------
34-
include_api : bool
35-
Whether API documentation will be built.
36-
single_doc : str or None
37-
If provided, this single documentation page will be generated.
38-
"""
39-
if single_doc is not None:
40-
single_doc = os.path.splitext(os.path.basename(single_doc))[0]
41-
include_api = False
42-
43-
with open(os.path.join(SOURCE_PATH, 'index.rst.template')) as f:
44-
t = jinja2.Template(f.read())
45-
46-
with open(os.path.join(SOURCE_PATH, 'index.rst'), 'w') as f:
47-
f.write(t.render(include_api=include_api,
48-
single_doc=single_doc))
49-
50-
5131
@contextmanager
5232
def _maybe_exclude_notebooks():
5333
"""Skip building the notebooks if pandoc is not installed.
@@ -58,6 +38,7 @@ def _maybe_exclude_notebooks():
5838
1. nbconvert isn't installed, or
5939
2. nbconvert is installed, but pandoc isn't
6040
"""
41+
# TODO move to exclude_pattern
6142
base = os.path.dirname(__file__)
6243
notebooks = [os.path.join(base, 'source', nb)
6344
for nb in ['style.ipynb']]
@@ -96,8 +77,112 @@ class DocBuilder:
9677
All public methods of this class can be called as parameters of the
9778
script.
9879
"""
99-
def __init__(self, num_jobs=1):
80+
def __init__(self, num_jobs=1, include_api=True, single_doc=None,
81+
verbosity=0):
10082
self.num_jobs = num_jobs
83+
self.include_api = include_api
84+
self.verbosity = verbosity
85+
self.single_doc = None
86+
self.single_doc_type = None
87+
if single_doc is not None:
88+
self._process_single_doc(single_doc)
89+
self.exclude_patterns = self._exclude_patterns
90+
91+
self._generate_index()
92+
if self.single_doc_type == 'docstring':
93+
self._run_os('sphinx-autogen', '-o',
94+
'source/generated_single', 'source/index.rst')
95+
96+
@property
97+
def _exclude_patterns(self):
98+
"""Docs source files that will be excluded from building."""
99+
# TODO move maybe_exclude_notebooks here
100+
if self.single_doc is not None:
101+
rst_files = [f for f in os.listdir(SOURCE_PATH)
102+
if ((f.endswith('.rst') or f.endswith('.ipynb'))
103+
and (f != 'index.rst')
104+
and (f != '{0}.rst'.format(self.single_doc)))]
105+
if self.single_doc_type != 'api':
106+
rst_files += ['generated/*.rst']
107+
elif not self.include_api:
108+
rst_files = ['api.rst', 'generated/*.rst']
109+
else:
110+
rst_files = ['generated_single/*.rst']
111+
112+
exclude_patterns = ','.join(
113+
'{!r}'.format(i) for i in ['**.ipynb_checkpoints'] + rst_files)
114+
115+
return exclude_patterns
116+
117+
def _process_single_doc(self, single_doc):
118+
"""Extract self.single_doc (base name) and self.single_doc_type from
119+
passed single_doc kwarg.
120+
121+
"""
122+
self.include_api = False
123+
124+
if single_doc == 'api.rst' or single_doc == 'api':
125+
self.single_doc_type = 'api'
126+
self.single_doc = 'api'
127+
elif os.path.exists(os.path.join(SOURCE_PATH, single_doc)):
128+
self.single_doc_type = 'rst'
129+
self.single_doc = os.path.splitext(os.path.basename(single_doc))[0]
130+
elif os.path.exists(
131+
os.path.join(SOURCE_PATH, '{}.rst'.format(single_doc))):
132+
self.single_doc_type = 'rst'
133+
self.single_doc = single_doc
134+
elif single_doc is not None:
135+
try:
136+
obj = pandas # noqa: F821
137+
for name in single_doc.split('.'):
138+
obj = getattr(obj, name)
139+
except AttributeError:
140+
raise ValueError('Single document not understood, it should '
141+
'be a file in doc/source/*.rst (e.g. '
142+
'"contributing.rst" or a pandas function or '
143+
'method (e.g. "pandas.DataFrame.head")')
144+
else:
145+
self.single_doc_type = 'docstring'
146+
if single_doc.startswith('pandas.'):
147+
self.single_doc = single_doc[len('pandas.'):]
148+
else:
149+
self.single_doc = single_doc
150+
151+
def _copy_generated_docstring(self):
152+
"""Copy existing generated (from api.rst) docstring page because
153+
this is more correct in certain cases (where a custom autodoc
154+
template is used).
155+
156+
"""
157+
fname = os.path.join(SOURCE_PATH, 'generated',
158+
'pandas.{}.rst'.format(self.single_doc))
159+
temp_dir = os.path.join(SOURCE_PATH, 'generated_single')
160+
161+
try:
162+
os.makedirs(temp_dir)
163+
except OSError:
164+
pass
165+
166+
if os.path.exists(fname):
167+
try:
168+
# copying to make sure sphinx always thinks it is new
169+
# and needs to be re-generated (to pick source code changes)
170+
shutil.copy(fname, temp_dir)
171+
except: # noqa
172+
pass
173+
174+
def _generate_index(self):
175+
"""Create index.rst file with the specified sections."""
176+
if self.single_doc_type == 'docstring':
177+
self._copy_generated_docstring()
178+
179+
with open(os.path.join(SOURCE_PATH, 'index.rst.template')) as f:
180+
t = jinja2.Template(f.read())
181+
182+
with open(os.path.join(SOURCE_PATH, 'index.rst'), 'w') as f:
183+
f.write(t.render(include_api=self.include_api,
184+
single_doc=self.single_doc,
185+
single_doc_type=self.single_doc_type))
101186

102187
@staticmethod
103188
def _create_build_structure():
@@ -121,7 +206,10 @@ def _run_os(*args):
121206
--------
122207
>>> DocBuilder()._run_os('python', '--version')
123208
"""
124-
subprocess.check_call(args, stderr=subprocess.STDOUT)
209+
# TODO check_call should be more safe, but it fails with
210+
# exclude patterns, needs investigation
211+
# subprocess.check_call(args, stderr=subprocess.STDOUT)
212+
os.system(' '.join(args))
125213

126214
def _sphinx_build(self, kind):
127215
"""Call sphinx to build documentation.
@@ -142,11 +230,23 @@ def _sphinx_build(self, kind):
142230
self._run_os('sphinx-build',
143231
'-j{}'.format(self.num_jobs),
144232
'-b{}'.format(kind),
145-
'-d{}'.format(os.path.join(BUILD_PATH,
146-
'doctrees')),
233+
'-{}'.format(
234+
'v' * self.verbosity) if self.verbosity else '',
235+
'-d{}'.format(os.path.join(BUILD_PATH, 'doctrees')),
236+
'-Dexclude_patterns={}'.format(self.exclude_patterns),
147237
SOURCE_PATH,
148238
os.path.join(BUILD_PATH, kind))
149239

240+
def _open_browser(self):
241+
base_url = os.path.join('file://', DOC_PATH, 'build', 'html')
242+
if self.single_doc_type == 'docstring':
243+
url = os.path.join(
244+
base_url,
245+
'generated_single', 'pandas.{}.html'.format(self.single_doc))
246+
else:
247+
url = os.path.join(base_url, '{}.html'.format(self.single_doc))
248+
webbrowser.open(url, new=2)
249+
150250
def html(self):
151251
"""Build HTML documentation."""
152252
self._create_build_structure()
@@ -156,6 +256,11 @@ def html(self):
156256
if os.path.exists(zip_fname):
157257
os.remove(zip_fname)
158258

259+
if self.single_doc is not None:
260+
self._open_browser()
261+
shutil.rmtree(os.path.join(SOURCE_PATH, 'generated_single'),
262+
ignore_errors=True)
263+
159264
def latex(self, force=False):
160265
"""Build PDF documentation."""
161266
self._create_build_structure()
@@ -222,21 +327,32 @@ def main():
222327
metavar='FILENAME',
223328
type=str,
224329
default=None,
225-
help=('filename of section to compile, '
226-
'e.g. "indexing"'))
330+
help=('filename of section or method name to '
331+
'compile, e.g. "indexing", "DataFrame.join"'))
227332
argparser.add_argument('--python-path',
228333
type=str,
229-
default=os.path.join(DOC_PATH, '..'),
334+
default=os.path.dirname(DOC_PATH),
230335
help='path')
336+
argparser.add_argument('-v', action='count', dest='verbosity', default=0,
337+
help=('increase verbosity (can be repeated), '
338+
'passed to the sphinx build command'))
231339
args = argparser.parse_args()
232340

233341
if args.command not in cmds:
234342
raise ValueError('Unknown command {}. Available options: {}'.format(
235343
args.command, ', '.join(cmds)))
236344

345+
# Below we update both os.environ and sys.path. The former is used by
346+
# external libraries (namely Sphinx) to compile this module and resolve
347+
# the import of `python_path` correctly. The latter is used to resolve
348+
# the import within the module, injecting it into the global namespace
237349
os.environ['PYTHONPATH'] = args.python_path
238-
_generate_index(not args.no_api, args.single)
239-
getattr(DocBuilder(args.num_jobs), args.command)()
350+
sys.path.append(args.python_path)
351+
globals()['pandas'] = importlib.import_module('pandas')
352+
353+
builder = DocBuilder(args.num_jobs, not args.no_api, args.single,
354+
args.verbosity)
355+
getattr(builder, args.command)()
240356

241357

242358
if __name__ == '__main__':

0 commit comments

Comments
 (0)