11
11
12
12
Examples:
13
13
./scripts/validate_rst_title_capitalization.py doc/source/development/contributing.rst
14
- ./scripts/validate_rst_title_capitalization.py doc/source/index.rst doc/source/ecosystem.rst
15
14
./scripts/validate_rst_title_capitalization.py doc/source/
16
15
17
16
Files that cannot be validated: (code crashes when validating for some reason)
33
32
from os import walk
34
33
from typing import Generator , List , Tuple
35
34
36
- class suppress_stdout_stderr (object ):
35
+
36
+ class suppress_stdout_stderr :
37
37
'''
38
38
Code source:
39
39
https://stackoverflow.com/questions/11130156/suppress-stdout-stderr-print-from-python-functions
@@ -52,27 +52,27 @@ class suppress_stdout_stderr(object):
52
52
'''
53
53
def __init__ (self ):
54
54
# Open a pair of null files
55
- self .null_fds = [os .open (os .devnull ,os .O_RDWR ) for x in range (2 )]
55
+ self .null_fds = [os .open (os .devnull , os .O_RDWR ) for x in range (2 )]
56
56
# Save the actual stdout (1) and stderr (2) file descriptors.
57
57
self .save_fds = [os .dup (1 ), os .dup (2 )]
58
58
59
59
def __enter__ (self ):
60
60
# Assign the null pointers to stdout and stderr.
61
- os .dup2 (self .null_fds [0 ],1 )
62
- os .dup2 (self .null_fds [1 ],2 )
61
+ os .dup2 (self .null_fds [0 ], 1 )
62
+ os .dup2 (self .null_fds [1 ], 2 )
63
63
64
64
def __exit__ (self , * _ ):
65
65
# Re-assign the real stdout/stderr back to (1) and (2)
66
- os .dup2 (self .save_fds [0 ],1 )
67
- os .dup2 (self .save_fds [1 ],2 )
66
+ os .dup2 (self .save_fds [0 ], 1 )
67
+ os .dup2 (self .save_fds [1 ], 2 )
68
68
# Close all file descriptors
69
69
for fd in self .null_fds + self .save_fds :
70
70
os .close (fd )
71
71
72
72
73
73
# Keynames that would not follow capitalization convention
74
74
CAPITALIZATION_EXCEPTIONS = {
75
- 'pandas' , 'Python' , 'IPython' ,'PyTables' , 'Excel' , 'JSON' ,
75
+ 'pandas' , 'Python' , 'IPython' , 'PyTables' , 'Excel' , 'JSON' ,
76
76
'HTML' , 'SAS' , 'SQL' , 'BigQuery' , 'STATA' , 'Interval' , 'PEP8' ,
77
77
'Period' , 'Series' , 'Index' , 'DataFrame' , 'C' , 'Git' , 'GitHub' , 'NumPy' ,
78
78
'Apache' , 'Arrow' , 'Parquet' , 'Triage' , 'MultiIndex' , 'NumFOCUS' , 'sklearn-pandas'
@@ -92,7 +92,8 @@ def __exit__(self, *_):
92
92
cannotValidate = ['doc/source/user_guide/io.rst' , 'doc/source/whatsnew/v0.17.1.rst' ]
93
93
94
94
# Error Message:
95
- errMessage = "Heading capitalization formatted incorrectly. Please correctly capitalize"
95
+ errMessage = 'Heading capitalization formatted incorrectly. Please correctly capitalize'
96
+
96
97
97
98
def followCapitalizationConvention (title : str ) -> bool :
98
99
'''
@@ -104,7 +105,7 @@ def followCapitalizationConvention(title: str) -> bool:
104
105
'''
105
106
106
107
# split with delimiters comma, semicolon and space, parentheses, colon, slashes
107
- wordList = re .split (r'[;,/():\s]\s*' , title ) # followed by any amount of extra whitespace.
108
+ wordList = re .split (r'[;,/():\s]\s*' , title )
108
109
109
110
# Edge Case: First word is an empty string
110
111
if (len (wordList [0 ]) == 0 ):
@@ -137,6 +138,7 @@ def followCapitalizationConvention(title: str) -> bool:
137
138
# Returning True if the heading follows the capitalization convention
138
139
return True
139
140
141
+
140
142
def findLineNumber (node : docutils .nodes ) -> int :
141
143
'''
142
144
Recursive method that finds the line number in a document for a particular node
@@ -149,11 +151,12 @@ def findLineNumber(node: docutils.nodes) -> int:
149
151
'''
150
152
if (node .tagname == 'document' ):
151
153
return 1
152
- elif (node .line == None ):
154
+ elif (node .line is None ):
153
155
return findLineNumber (node .parent )
154
156
else :
155
157
return node .line - 1
156
158
159
+
157
160
def parseRST (rstFile : str ) -> docutils .nodes .document :
158
161
'''
159
162
Method to parse through an rstFile and return a document tree
@@ -169,19 +172,21 @@ def parseRST(rstFile: str) -> docutils.nodes.document:
169
172
# Set up default settings for the document tree
170
173
settings = docutils .frontend .OptionParser (
171
174
components = (docutils .parsers .rst .Parser ,)
172
- ).get_default_values ()
175
+ ).get_default_values ()
173
176
174
177
# Initialize an empty document tree with the default settings from above
175
178
document = docutils .utils .new_document ('Document' , settings )
176
179
177
- # Parse the input string into an RST document tree , suppressing any stdout from the parse method
180
+ # Parse input into an RST doctree , suppressing any stdout from parse method
178
181
with suppress_stdout_stderr ():
179
182
parser .parse (input , document )
180
183
181
184
# Return the root node of the document tree
182
185
return document
183
186
184
- def findBadTitlesInDoctree (document : docutils .nodes .document ) -> Generator [List [str ], List [int ], None ]:
187
+
188
+ def findBadTitlesInDoctree (document : docutils .nodes .document ) -> Generator [
189
+ List [str ], List [int ], None ]:
185
190
'''
186
191
Algorithm to identify particular text nodes as headings
187
192
along with the text node's line number.
@@ -192,9 +197,9 @@ def findBadTitlesInDoctree(document: docutils.nodes.document) -> Generator[List[
192
197
193
198
However, the problem occurs when we encounter text that has been either
194
199
italicized, bolded, referenced, etc. In these situations, the tagname of
195
- the parent node could be one of the following: 'emphasis', 'strong', 'reference', 'literal',
196
- stored in the 'listOfMarkers' set variable. In this situation, the node's
197
- grandparent would have the 'title' tagname instead.
200
+ the parent node could be one of the following: 'emphasis', 'strong',
201
+ 'reference', and 'literal', stored in the 'listOfMarkers' set variable. In
202
+ this situation, the node's grandparent would have the 'title' tagname instead.
198
203
199
204
Let's see an example that can cause a problem. The heading provided will be
200
205
'Looking at *pandas* docs' versus 'Looking at pandas docs'. In this example,
@@ -203,7 +208,7 @@ def findBadTitlesInDoctree(document: docutils.nodes.document) -> Generator[List[
203
208
204
209
'Looking at *pandas* docs' 'Looking at pandas docs'
205
210
title title
206
- / | \ |
211
+ / | | |
207
212
#text emphasis #text VS #text
208
213
| | | |
209
214
'Looking at' #text 'docs' 'Looking at pandas docs'
@@ -225,7 +230,7 @@ def findBadTitlesInDoctree(document: docutils.nodes.document) -> Generator[List[
225
230
226
231
'''
227
232
228
- # Initialize an empty string. myText will be used to construct headings and append into titleList
233
+ # myText will be used to construct headings and append into titleList
229
234
myText : str = ""
230
235
231
236
# A docutils.nodes object that stores a listOfMarkers text's grandparent node,
@@ -239,7 +244,7 @@ def findBadTitlesInDoctree(document: docutils.nodes.document) -> Generator[List[
239
244
# titleList is the list of headings that is encountered in the doctree
240
245
titleList : List [str ] = []
241
246
242
- # A list of line numbers that the corresponding headings in titleList can be found at
247
+ # List of line numbers that corresponding headings in titleList can be found at
243
248
lineNumberList : List [int ] = []
244
249
245
250
# Traverse through the nodes.Text in the document tree to construct headings
@@ -258,12 +263,12 @@ def findBadTitlesInDoctree(document: docutils.nodes.document) -> Generator[List[
258
263
beforeMarker = False
259
264
# Case 2: Encounter a node with parent tagname in listOfMarkers
260
265
elif (node .parent .parent .tagname == 'title' and
261
- node .parent .tagname in listOfMarkers ):
266
+ node .parent .tagname in listOfMarkers ):
262
267
lineno = findLineNumber (node )
263
268
myText = myText + node .astext ()
264
269
beforeMarker = True
265
270
markerGrandparent = node .parent .parent
266
- # Case 3: Encounter a node with parent tagname from none of the above (Ex. 'paragraph' tagname )
271
+ # Case 3: Encounter parent tagname of none of the above (Ex. 'paragraph')
267
272
else :
268
273
beforeMarker = False
269
274
if (myText != "" ):
@@ -272,14 +277,15 @@ def findBadTitlesInDoctree(document: docutils.nodes.document) -> Generator[List[
272
277
myText = ""
273
278
lineno = 0
274
279
275
- # Sometimes, there is leftover string that hasn't been appended yet due to how the for loop works
280
+ # Leftover string that hasn't been appended yet due to how the for loop works
276
281
if (myText != "" ):
277
282
titleList .append (myText )
278
283
lineNumberList .append (lineno )
279
284
280
285
# Return a list of the headings and a list of their corresponding line numbers
281
286
return titleList , lineNumberList
282
287
288
+
283
289
def fillBadTitleDictionary (rstFile : str ) -> None :
284
290
'''
285
291
Method that prints all of the bad titles
@@ -298,10 +304,10 @@ def fillBadTitleDictionary(rstFile: str) -> None:
298
304
# Parse rstFile with an RST parser
299
305
document = parseRST (rstFile )
300
306
301
- # Produce a list of headings along with their line numbers from the root document node
307
+ # Make a list of headings along with their line numbers from document tree
302
308
titleList , lineNumberList = findBadTitlesInDoctree (document )
303
309
304
- # Append the badTitleDictionary if the capitalization convention for a heading is not followed
310
+ # Append the badTitleDictionary if the capitalization convention not followed
305
311
for i in range (len (titleList )):
306
312
if not followCapitalizationConvention (titleList [i ]):
307
313
if rstFile not in badTitleDictionary :
@@ -319,7 +325,7 @@ def createRSTDirectoryList(source_paths: List[str]) -> List[str]:
319
325
# List of .rst file paths
320
326
f = []
321
327
322
- # Loop through source_paths. If address is a folder , recursively look through the folder for .rst files
328
+ # Loop through source_paths, recursively looking for .rst files
323
329
for directoryAddress in source_paths :
324
330
if not os .path .exists (directoryAddress ):
325
331
raise ValueError (
@@ -336,6 +342,7 @@ def createRSTDirectoryList(source_paths: List[str]) -> List[str]:
336
342
# Return the filled up list of .rst file paths
337
343
return f
338
344
345
+
339
346
def main (source_paths : List [str ], output_format : str ) -> bool :
340
347
'''
341
348
The main method to execute all commands
@@ -357,14 +364,16 @@ def main(source_paths: List[str], output_format: str) -> bool:
357
364
print ()
358
365
for key in badTitleDictionary :
359
366
for titles in badTitleDictionary [key ]:
360
- print (key + ":" + str (titles [1 ]) + ": " + errMessage + " \" " + titles [0 ] + "\" " )
367
+ print (key + ":" + str (titles [1 ]) + ": " + errMessage
368
+ + " \" " + titles [0 ] + "\" "
369
+ )
361
370
362
371
# Exit status of 1
363
372
return True
364
373
365
374
366
375
if __name__ == "__main__" :
367
- parser = argparse .ArgumentParser (description = 'Validate capitalization for document headings ' )
376
+ parser = argparse .ArgumentParser (description = 'Validate heading capitalization ' )
368
377
369
378
parser .add_argument (
370
379
"paths" , nargs = "+" , default = "." , help = "Source paths of file/directory to check."
0 commit comments