forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvalidate_docstrings.py
executable file
·369 lines (309 loc) · 11.8 KB
/
validate_docstrings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
#!/usr/bin/env python
"""
Analyze docstrings to detect errors.
If no argument is provided, it does a quick check of docstrings and returns
a csv with all API functions and results of basic checks.
If a function or method is provided in the form "pandas.function",
"pandas.module.class.method", etc. a list of all errors in the docstring for
the specified function or method.
Usage::
$ ./validate_docstrings.py
$ ./validate_docstrings.py pandas.DataFrame.head
"""
import os
import sys
import csv
import re
import functools
import argparse
import contextlib
import inspect
import importlib
import doctest
import pydoc
try:
from io import StringIO
except ImportError:
from cStringIO import StringIO
import numpy
BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.join(BASE_PATH))
import pandas
sys.path.insert(1, os.path.join(BASE_PATH, 'doc', 'sphinxext'))
from numpydoc.docscrape import NumpyDocString
def _to_original_callable(obj):
while True:
if inspect.isfunction(obj) or inspect.isclass(obj):
f = inspect.getfile(obj)
if f.startswith('<') and f.endswith('>'):
return None
return obj
if inspect.ismethod(obj):
obj = obj.__func__
elif isinstance(obj, functools.partial):
obj = obj.func
elif isinstance(obj, property):
obj = obj.fget
else:
return None
def _output_header(title, width=80, char='#'):
full_line = char * width
side_len = (width - len(title) - 2) // 2
adj = '' if len(title) % 2 == 0 else ' '
title_line = '{side} {title}{adj} {side}'.format(side=char * side_len,
title=title,
adj=adj)
return '\n{full_line}\n{title_line}\n{full_line}\n\n'.format(
full_line=full_line, title_line=title_line)
class Docstring:
def __init__(self, method_name, method_obj):
self.method_name = method_name
self.method_obj = method_obj
self.raw_doc = pydoc.getdoc(method_obj)
self.doc = NumpyDocString(self.raw_doc)
def __len__(self):
return len(self.raw_doc)
@property
def source_file_name(self):
fname = inspect.getsourcefile(self.method_obj)
if fname:
fname = os.path.relpath(fname, BASE_PATH)
return fname
@property
def source_file_def_line(self):
try:
return inspect.getsourcelines(self.method_obj)[-1]
except OSError:
pass
@property
def github_url(self):
url = 'https://github.com/pandas-dev/pandas/blob/master/'
url += '{}#L{}'.format(self.source_file_name,
self.source_file_def_line)
return url
@property
def first_line_blank(self):
if self.raw_doc:
return not bool(self.raw_doc.split('\n')[0].strip())
@property
def summary(self):
if not self.doc['Extended Summary'] and len(self.doc['Summary']) > 1:
return ''
return ' '.join(self.doc['Summary'])
@property
def extended_summary(self):
if not self.doc['Extended Summary'] and len(self.doc['Summary']) > 1:
return ' '.join(self.doc['Summary'])
return ' '.join(self.doc['Extended Summary'])
@property
def needs_summary(self):
return not (bool(self.summary) and bool(self.extended_summary))
@property
def doc_parameters(self):
return self.doc['Parameters']
@property
def signature_parameters(self):
if not (inspect.isfunction(self.method_obj)
or inspect.isclass(self.method_obj)):
return tuple()
if (inspect.isclass(self.method_obj)
and self.method_name.split('.')[-1] in {'dt', 'str', 'cat'}):
# accessor classes have a signature, but don't want to show this
return tuple()
params = tuple(inspect.signature(self.method_obj).parameters.keys())
if params and params[0] in ('self', 'cls'):
return params[1:]
return params
@property
def parameter_mismatches(self):
errs = []
signature_params = self.signature_parameters
if self.doc_parameters:
doc_params = list(zip(*self.doc_parameters))[0]
else:
doc_params = []
missing = set(signature_params) - set(doc_params)
if missing:
errs.append('Parameters {!r} not documented'.format(missing))
extra = set(doc_params) - set(signature_params)
if extra:
errs.append('Unknown parameters {!r}'.format(extra))
if (not missing and not extra and signature_params != doc_params
and not (not signature_params and not doc_params)):
errs.append('Wrong parameters order. ' +
'Actual: {!r}. '.format(signature_params) +
'Documented: {!r}'.format(doc_params))
return errs
@property
def correct_parameters(self):
return not bool(self.parameter_mismatches)
@property
def see_also(self):
return self.doc['See Also']
@property
def examples(self):
return self.doc['Examples']
@property
def first_line_ends_in_dot(self):
if self.doc:
return self.doc.split('\n')[0][-1] == '.'
@property
def deprecated(self):
pattern = re.compile('.. deprecated:: ')
return (self.method_name.startswith('pandas.Panel') or
bool(pattern.search(self.summary)) or
bool(pattern.search(self.extended_summary)))
@property
def mentioned_private_classes(self):
private_classes = ['NDFrame', 'IndexOpsMixin']
return [klass for klass in private_classes if klass in self.raw_doc]
@property
def examples_errors(self):
flags = doctest.NORMALIZE_WHITESPACE | doctest.IGNORE_EXCEPTION_DETAIL
finder = doctest.DocTestFinder()
runner = doctest.DocTestRunner(optionflags=flags)
context = {'np': numpy, 'pd': pandas}
error_msgs = ''
for test in finder.find(self.raw_doc, self.method_name, globs=context):
f = StringIO()
with contextlib.redirect_stdout(f):
runner.run(test)
error_msgs += f.getvalue()
return error_msgs
def get_api_items():
api_fname = os.path.join(BASE_PATH, 'doc', 'source', 'api.rst')
position = None
with open(api_fname) as f:
for line in f:
if line.startswith('.. currentmodule::'):
current_module = line.replace('.. currentmodule::', '').strip()
continue
if line == '.. autosummary::\n':
position = 'autosummary'
continue
if position == 'autosummary':
if line == '\n':
position = 'items'
continue
if position == 'items':
if line == '\n':
position = None
continue
item = line.strip()
func = importlib.import_module(current_module)
for part in item.split('.'):
func = getattr(func, part)
yield '.'.join([current_module, item]), func
def validate_all():
writer = csv.writer(sys.stdout)
writer.writerow(['Function or method',
'Type',
'File',
'Code line',
'GitHub link',
'Is deprecated',
'Has summary',
'Has extended summary',
'Parameters ok',
'Has examples',
'Shared code with'])
seen = {}
for func_name, func in get_api_items():
obj_type = type(func).__name__
original_callable = _to_original_callable(func)
if original_callable is None:
writer.writerow([func_name, obj_type] + [''] * 9)
else:
doc = Docstring(func_name, original_callable)
key = doc.source_file_name, doc.source_file_def_line
shared_code = seen.get(key, '')
seen[key] = func_name
writer.writerow([func_name,
obj_type,
doc.source_file_name,
doc.source_file_def_line,
doc.github_url,
int(doc.deprecated),
int(bool(doc.summary)),
int(bool(doc.extended_summary)),
int(doc.correct_parameters),
int(bool(doc.examples)),
shared_code])
return 0
def validate_one(func_name):
for maxsplit in range(1, func_name.count('.') + 1):
# TODO when py3 only replace by: module, *func_parts = ...
func_name_split = func_name.rsplit('.', maxsplit=maxsplit)
module = func_name_split[0]
func_parts = func_name_split[1:]
try:
func_obj = importlib.import_module(module)
except ImportError:
pass
else:
continue
if 'module' not in locals():
raise ImportError('No module can be imported '
'from "{}"'.format(func_name))
for part in func_parts:
func_obj = getattr(func_obj, part)
doc = Docstring(func_name, func_obj)
sys.stderr.write(_output_header('Docstring ({})'.format(func_name)))
sys.stderr.write('{}\n'.format(doc.raw_doc))
errs = []
if not doc.summary:
errs.append('No summary found')
else:
if not doc.summary[0].isupper():
errs.append('Summary does not start with capital')
if doc.summary[-1] != '.':
errs.append('Summary does not end with dot')
if doc.summary.split(' ')[0][-1] == 's':
errs.append('Summary must start with infinitive verb, '
'not third person (e.g. use "Generate" instead of '
'"Generates")')
if not doc.extended_summary:
errs.append('No extended summary found')
param_errs = doc.parameter_mismatches
if param_errs:
errs.append('Errors in parameters section')
for param_err in param_errs:
errs.append('\t{}'.format(param_err))
mentioned_errs = doc.mentioned_private_classes
if mentioned_errs:
errs.append('Private classes mentioned: {}'.format(mentioned_errs))
examples_errs = ''
if not doc.examples:
errs.append('No examples section found')
else:
examples_errs = doc.examples_errors
if examples_errs:
errs.append('Examples do not pass tests')
sys.stderr.write(_output_header('Validation'))
if errs:
sys.stderr.write('Errors found:\n')
for err in errs:
sys.stderr.write('\t{}\n'.format(err))
else:
sys.stderr.write('Docstring for "{}" correct. :)\n'.format(func_name))
if examples_errs:
sys.stderr.write(_output_header('Doctests'))
sys.stderr.write(examples_errs)
return len(errs)
def main(function):
if function is None:
return validate_all()
else:
return validate_one(function)
if __name__ == '__main__':
argparser = argparse.ArgumentParser(
description='validate pandas docstrings')
argparser.add_argument('function',
nargs='?',
default=None,
help=('function or method to validate '
'(e.g. pandas.DataFrame.head) '
'if not provided, all docstrings '
'are validated'))
args = argparser.parse_args()
sys.exit(main(args.function))