1
1
#!/usr/bin/env python
2
2
3
- """Python script for collecting the titles in the rst files and validating
3
+ """
4
+ Author: tonywu1999, Date Edited: 01/17/2020
5
+
6
+ Python script for collecting the titles in the rst files and validating
4
7
if they follow the capitalization convention. Prints the titles that do not
5
8
follow the convention. Particularly used for .rst files in the doc/source folder
6
9
18
21
19
22
"""
20
23
24
+ import argparse
21
25
import sys
22
26
from docutils .parsers .rst import Parser
23
27
import docutils
24
28
from docutils import nodes
25
29
import re
26
30
import os
27
31
from os import walk
32
+ from typing import Generator , List , Tuple
28
33
29
34
class suppress_stdout_stderr (object ):
30
35
'''
@@ -71,7 +76,11 @@ def __exit__(self, *_):
71
76
'Apache' , 'Arrow' , 'Parquet' , 'Triage' , 'MultiIndex' , 'NumFOCUS'
72
77
}
73
78
74
- # Dictionary of bad titles that will be printed later
79
+ # Lowercase representation of CAPITALIZATION_EXCEPTIONS
80
+ CAPITALIZATION_EXCEPTIONS_LOWER = {word .lower () for word in CAPITALIZATION_EXCEPTIONS }
81
+
82
+ # Dictionary of bad titles that will be printed later along with line numbers
83
+ # Key: Document Directory, Value: Pair(Bad Title, Line Number)
75
84
badTitleDictionary = {}
76
85
77
86
# List of problematic tags that are exceptions to parent rule
@@ -83,31 +92,26 @@ def __exit__(self, *_):
83
92
# Error Message:
84
93
errMessage = "Title capitalization formatted incorrectly. Manually format correctly"
85
94
86
-
87
95
def followCapitalizationConvention (title : str ) -> bool :
88
96
'''
89
- Method returns true or false depending on whether a title follows
90
- the capitalization convention
97
+ tonywu1999's algorithm to determine if a heading follows the capitalization convention
91
98
92
- '''
99
+ This method returns true if the title follows the convention
100
+ and false if it does not
93
101
94
- # Lowercase representation of keynames
95
- keyNamesLower = {'pandas' }
96
- for k in CAPITALIZATION_EXCEPTIONS :
97
- keyNamesLower .add (k .lower ())
102
+ '''
98
103
99
104
# split with delimiters comma, semicolon and space, parentheses, colon
100
105
wordList = re .split (r'[;,():\s]\s*' , title ) # followed by any amount of extra whitespace.
101
106
102
-
103
107
# Edge Case: First word is an empty string
104
108
if (len (wordList [0 ]) == 0 ):
105
109
return False
106
110
107
111
# Dealing with the first word of the title
108
112
if wordList [0 ] not in CAPITALIZATION_EXCEPTIONS :
109
113
# word is not in keyNames but has different capitalization
110
- if wordList [0 ].lower () in keyNamesLower :
114
+ if wordList [0 ].lower () in CAPITALIZATION_EXCEPTIONS_LOWER :
111
115
return False
112
116
# First letter of first word must be uppercase
113
117
if (not wordList [0 ][0 ].isupper ()):
@@ -121,7 +125,7 @@ def followCapitalizationConvention(title: str) -> bool:
121
125
for i in range (1 , len (wordList )):
122
126
if wordList [i ] not in CAPITALIZATION_EXCEPTIONS :
123
127
# word is not in keyNames but has different capitalization
124
- if wordList [i ].lower () in keyNamesLower :
128
+ if wordList [i ].lower () in CAPITALIZATION_EXCEPTIONS_LOWER :
125
129
return False
126
130
# Remaining letters must not be uppercase
127
131
for j in range (len (wordList [i ])):
@@ -132,7 +136,8 @@ def followCapitalizationConvention(title: str) -> bool:
132
136
133
137
def findLineNumber (node : docutils .nodes ) -> int :
134
138
'''
135
- Method that finds the line number in a document for a particular node
139
+ Recursive method that finds the line number in a document for a particular node
140
+ in the doctree
136
141
137
142
'''
138
143
if (node .tagname == 'document' ):
@@ -142,21 +147,11 @@ def findLineNumber(node: docutils.nodes) -> int:
142
147
else :
143
148
return node .line - 1
144
149
145
- def fillBadTitleDictionary (rstFile : str ) -> None :
150
+ def parseRST (rstFile : str ) -> docutils . nodes . document :
146
151
'''
147
- Method that prints all of the bad titles
148
- Message: [directory of rstFile, line number of bad title, error message]
152
+ Method to parse through an rstFile and return a document tree
149
153
150
154
'''
151
- # Ensure file isn't one that causes the code to crash
152
- if rstFile in cannotValidate :
153
- return
154
- # Initialize this file's badtitleDictionary slot
155
- if rstFile in badTitleDictionary :
156
- return
157
- else :
158
- badTitleDictionary [rstFile ] = []
159
-
160
155
# Parse through rstFile
161
156
parser = docutils .parsers .rst .Parser ()
162
157
f = open (rstFile , "r" )
@@ -169,8 +164,14 @@ def fillBadTitleDictionary(rstFile: str) -> None:
169
164
with suppress_stdout_stderr ():
170
165
parser .parse (input , document )
171
166
167
+ return document
168
+
169
+ def findBadTitlesInDoctree (document : docutils .nodes .document ) -> Generator [List [str ], List [int ], None ]:
170
+ '''
171
+ tonywu1999's algorithm to identify particular text nodes as headings
172
+ along with the text node's line number
172
173
173
- # Fill up the titleList with lines that follow the title pattern
174
+ '''
174
175
myText = ""
175
176
markerGrandparent = ""
176
177
beforeMarker = False
@@ -206,49 +207,97 @@ def fillBadTitleDictionary(rstFile: str) -> None:
206
207
titleList .append (myText )
207
208
lineNumberList .append (lineno )
208
209
210
+ return titleList , lineNumberList
211
+
212
+ def fillBadTitleDictionary (rstFile : str ) -> None :
213
+ '''
214
+ Method that prints all of the bad titles
215
+ Message: [directory of rstFile, line number of bad title, error message]
216
+
217
+ '''
218
+
219
+ # Ensure file isn't one that causes the code to crash
220
+ if rstFile in cannotValidate :
221
+ return
222
+
223
+ # Ensure this file doesn't already have a badtitleDictionary slot
224
+ if rstFile in badTitleDictionary :
225
+ return
226
+
227
+ # Parse rstFile with an RST parser
228
+ document = parseRST (rstFile )
209
229
210
- # For each line in the titleList, append the badTitleDictionary if
211
- # the capitalization convention is not followed
230
+ # Produce a list of headings along with their line numbers from the root document node
231
+ titleList , lineNumberList = findBadTitlesInDoctree (document )
232
+
233
+ # Append the badTitleDictionary if the capitalization convention for a heading is not followed
212
234
for i in range (len (titleList )):
213
235
if not followCapitalizationConvention (titleList [i ]):
214
- badTitleDictionary [rstFile ].append ((titleList [i ], lineNumberList [i ]))
215
-
236
+ if rstFile not in badTitleDictionary :
237
+ badTitleDictionary [rstFile ] = [(titleList [i ], lineNumberList [i ])]
238
+ else :
239
+ badTitleDictionary [rstFile ].append ((titleList [i ], lineNumberList [i ]))
216
240
217
- def findBadTitles (directoryAddress : str ) -> None :
218
241
242
+ def createRSTDirectoryList (source_paths : List [str ]) -> List [str ]:
219
243
'''
220
- Method finds all the bad titles, runs fillBadTitleDictionary
244
+ Given the command line arguments of directory paths, this method
245
+ creates a list of all of the .rst file directories that these paths contain
221
246
222
247
'''
223
248
f = []
224
- if (directoryAddress .endswith (".rst" )):
225
- f .append (directoryAddress )
226
- else :
227
- for (dirpath , dirnames , filenames ) in walk (directoryAddress ):
228
- for file in filenames :
229
- if file .endswith (".rst" ):
230
- f .append (os .path .join (dirpath , file ))
249
+ for directoryAddress in source_paths :
250
+ if (directoryAddress .endswith (".rst" )):
251
+ f .append (directoryAddress )
252
+ else :
253
+ for (dirpath , dirnames , filenames ) in walk (directoryAddress ):
254
+ for file in filenames :
255
+ if file .endswith (".rst" ):
256
+ f .append (os .path .join (dirpath , file ))
231
257
232
- for filename in f :
233
- fillBadTitleDictionary (filename )
258
+ return f
234
259
235
- # Main Method
236
- if __name__ == "__main__" :
237
- for i in range (1 , len (sys .argv )):
238
- findBadTitles (sys .argv [i ])
260
+ def main (source_paths : List [str ], output_format : str ) -> bool :
261
+ '''
262
+ The main method to execute all commands
239
263
240
- print ("BAD TITLES \n \n " )
264
+ '''
265
+
266
+ # Create a list of all RST files from command line directory list
267
+ directoryList = createRSTDirectoryList (source_paths )
268
+
269
+ # Fill the badTitleDictionary, which contains all incorrectly capitalized headings
270
+ for filename in directoryList :
271
+ fillBadTitleDictionary (filename )
272
+
273
+ # Return an exit status of 0 if there are no bad titles in the dictionary
274
+ if (len (badTitleDictionary ) == 0 ):
275
+ return False
241
276
242
277
# Print badTitleDictionary Results
243
- printed = False
244
278
for key in badTitleDictionary :
245
- if (len (badTitleDictionary [key ]) != 0 ):
246
- printed = True
247
- print (key )
248
- for titles in badTitleDictionary [key ]:
249
- print (titles )
250
- print ()
251
-
252
- # Exit code of 1 if there were bad titles
253
- if (printed ):
254
- sys .exit (1 )
279
+ print ()
280
+ print (key )
281
+ for titles in badTitleDictionary [key ]:
282
+ print (titles )
283
+
284
+ # Exit status of 1
285
+ return True
286
+
287
+
288
+ if __name__ == "__main__" :
289
+ parser = argparse .ArgumentParser (description = 'Validate capitalization for document headings' )
290
+
291
+ parser .add_argument (
292
+ "paths" , nargs = "+" , default = "." , help = "Source paths of file/directory to check."
293
+ )
294
+
295
+ parser .add_argument (
296
+ "--format" ,
297
+ default = "{source_path}:{line_number}:{heading}:{msg}" ,
298
+ help = "Output format of incorrectly capitalized titles" ,
299
+ )
300
+
301
+ args = parser .parse_args ()
302
+
303
+ sys .exit (main (args .paths , args .format ))
0 commit comments