Skip to content

Commit 2d3cfe7

Browse files
author
awu42
committed
argument parser correctly implemented
1 parent f513f29 commit 2d3cfe7

File tree

1 file changed

+107
-58
lines changed

1 file changed

+107
-58
lines changed

scripts/validate_rst_title_capitalization.py

+107-58
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
#!/usr/bin/env python
22

3-
"""Python script for collecting the titles in the rst files and validating
3+
"""
4+
Author: tonywu1999, Date Edited: 01/17/2020
5+
6+
Python script for collecting the titles in the rst files and validating
47
if they follow the capitalization convention. Prints the titles that do not
58
follow the convention. Particularly used for .rst files in the doc/source folder
69
@@ -18,13 +21,15 @@
1821
1922
"""
2023

24+
import argparse
2125
import sys
2226
from docutils.parsers.rst import Parser
2327
import docutils
2428
from docutils import nodes
2529
import re
2630
import os
2731
from os import walk
32+
from typing import Generator, List, Tuple
2833

2934
class suppress_stdout_stderr(object):
3035
'''
@@ -71,7 +76,11 @@ def __exit__(self, *_):
7176
'Apache', 'Arrow', 'Parquet', 'Triage', 'MultiIndex', 'NumFOCUS'
7277
}
7378

74-
# Dictionary of bad titles that will be printed later
79+
# Lowercase representation of CAPITALIZATION_EXCEPTIONS
80+
CAPITALIZATION_EXCEPTIONS_LOWER = {word.lower() for word in CAPITALIZATION_EXCEPTIONS}
81+
82+
# Dictionary of bad titles that will be printed later along with line numbers
83+
# Key: Document Directory, Value: Pair(Bad Title, Line Number)
7584
badTitleDictionary = {}
7685

7786
# List of problematic tags that are exceptions to parent rule
@@ -83,31 +92,26 @@ def __exit__(self, *_):
8392
# Error Message:
8493
errMessage = "Title capitalization formatted incorrectly. Manually format correctly"
8594

86-
8795
def followCapitalizationConvention(title: str) -> bool:
8896
'''
89-
Method returns true or false depending on whether a title follows
90-
the capitalization convention
97+
tonywu1999's algorithm to determine if a heading follows the capitalization convention
9198
92-
'''
99+
This method returns true if the title follows the convention
100+
and false if it does not
93101
94-
# Lowercase representation of keynames
95-
keyNamesLower = {'pandas'}
96-
for k in CAPITALIZATION_EXCEPTIONS:
97-
keyNamesLower.add(k.lower())
102+
'''
98103

99104
# split with delimiters comma, semicolon and space, parentheses, colon
100105
wordList = re.split(r'[;,():\s]\s*', title) # followed by any amount of extra whitespace.
101106

102-
103107
# Edge Case: First word is an empty string
104108
if (len(wordList[0]) == 0):
105109
return False
106110

107111
# Dealing with the first word of the title
108112
if wordList[0] not in CAPITALIZATION_EXCEPTIONS:
109113
# word is not in keyNames but has different capitalization
110-
if wordList[0].lower() in keyNamesLower:
114+
if wordList[0].lower() in CAPITALIZATION_EXCEPTIONS_LOWER:
111115
return False
112116
# First letter of first word must be uppercase
113117
if (not wordList[0][0].isupper()):
@@ -121,7 +125,7 @@ def followCapitalizationConvention(title: str) -> bool:
121125
for i in range(1, len(wordList)):
122126
if wordList[i] not in CAPITALIZATION_EXCEPTIONS:
123127
# word is not in keyNames but has different capitalization
124-
if wordList[i].lower() in keyNamesLower:
128+
if wordList[i].lower() in CAPITALIZATION_EXCEPTIONS_LOWER:
125129
return False
126130
# Remaining letters must not be uppercase
127131
for j in range(len(wordList[i])):
@@ -132,7 +136,8 @@ def followCapitalizationConvention(title: str) -> bool:
132136

133137
def findLineNumber(node: docutils.nodes) -> int:
134138
'''
135-
Method that finds the line number in a document for a particular node
139+
Recursive method that finds the line number in a document for a particular node
140+
in the doctree
136141
137142
'''
138143
if (node.tagname == 'document'):
@@ -142,21 +147,11 @@ def findLineNumber(node: docutils.nodes) -> int:
142147
else:
143148
return node.line - 1
144149

145-
def fillBadTitleDictionary(rstFile: str) -> None:
150+
def parseRST(rstFile: str) -> docutils.nodes.document:
146151
'''
147-
Method that prints all of the bad titles
148-
Message: [directory of rstFile, line number of bad title, error message]
152+
Method to parse through an rstFile and return a document tree
149153
150154
'''
151-
# Ensure file isn't one that causes the code to crash
152-
if rstFile in cannotValidate:
153-
return
154-
# Initialize this file's badtitleDictionary slot
155-
if rstFile in badTitleDictionary:
156-
return
157-
else:
158-
badTitleDictionary[rstFile] = []
159-
160155
# Parse through rstFile
161156
parser = docutils.parsers.rst.Parser()
162157
f = open(rstFile, "r")
@@ -169,8 +164,14 @@ def fillBadTitleDictionary(rstFile: str) -> None:
169164
with suppress_stdout_stderr():
170165
parser.parse(input, document)
171166

167+
return document
168+
169+
def findBadTitlesInDoctree(document: docutils.nodes.document) -> Generator[List[str], List[int], None]:
170+
'''
171+
tonywu1999's algorithm to identify particular text nodes as headings
172+
along with the text node's line number
172173
173-
# Fill up the titleList with lines that follow the title pattern
174+
'''
174175
myText = ""
175176
markerGrandparent = ""
176177
beforeMarker = False
@@ -206,49 +207,97 @@ def fillBadTitleDictionary(rstFile: str) -> None:
206207
titleList.append(myText)
207208
lineNumberList.append(lineno)
208209

210+
return titleList, lineNumberList
211+
212+
def fillBadTitleDictionary(rstFile: str) -> None:
213+
'''
214+
Method that prints all of the bad titles
215+
Message: [directory of rstFile, line number of bad title, error message]
216+
217+
'''
218+
219+
# Ensure file isn't one that causes the code to crash
220+
if rstFile in cannotValidate:
221+
return
222+
223+
# Ensure this file doesn't already have a badtitleDictionary slot
224+
if rstFile in badTitleDictionary:
225+
return
226+
227+
# Parse rstFile with an RST parser
228+
document = parseRST(rstFile)
209229

210-
# For each line in the titleList, append the badTitleDictionary if
211-
# the capitalization convention is not followed
230+
# Produce a list of headings along with their line numbers from the root document node
231+
titleList, lineNumberList = findBadTitlesInDoctree(document)
232+
233+
# Append the badTitleDictionary if the capitalization convention for a heading is not followed
212234
for i in range(len(titleList)):
213235
if not followCapitalizationConvention(titleList[i]):
214-
badTitleDictionary[rstFile].append((titleList[i], lineNumberList[i]))
215-
236+
if rstFile not in badTitleDictionary:
237+
badTitleDictionary[rstFile] = [(titleList[i], lineNumberList[i])]
238+
else:
239+
badTitleDictionary[rstFile].append((titleList[i], lineNumberList[i]))
216240

217-
def findBadTitles(directoryAddress: str) -> None:
218241

242+
def createRSTDirectoryList(source_paths: List[str]) -> List[str]:
219243
'''
220-
Method finds all the bad titles, runs fillBadTitleDictionary
244+
Given the command line arguments of directory paths, this method
245+
creates a list of all of the .rst file directories that these paths contain
221246
222247
'''
223248
f = []
224-
if (directoryAddress.endswith(".rst")):
225-
f.append(directoryAddress)
226-
else:
227-
for (dirpath, dirnames, filenames) in walk(directoryAddress):
228-
for file in filenames:
229-
if file.endswith(".rst"):
230-
f.append(os.path.join(dirpath, file))
249+
for directoryAddress in source_paths:
250+
if (directoryAddress.endswith(".rst")):
251+
f.append(directoryAddress)
252+
else:
253+
for (dirpath, dirnames, filenames) in walk(directoryAddress):
254+
for file in filenames:
255+
if file.endswith(".rst"):
256+
f.append(os.path.join(dirpath, file))
231257

232-
for filename in f:
233-
fillBadTitleDictionary(filename)
258+
return f
234259

235-
# Main Method
236-
if __name__ == "__main__":
237-
for i in range(1, len(sys.argv)):
238-
findBadTitles(sys.argv[i])
260+
def main(source_paths: List[str], output_format: str) -> bool:
261+
'''
262+
The main method to execute all commands
239263
240-
print("BAD TITLES \n \n")
264+
'''
265+
266+
# Create a list of all RST files from command line directory list
267+
directoryList = createRSTDirectoryList(source_paths)
268+
269+
# Fill the badTitleDictionary, which contains all incorrectly capitalized headings
270+
for filename in directoryList:
271+
fillBadTitleDictionary(filename)
272+
273+
# Return an exit status of 0 if there are no bad titles in the dictionary
274+
if (len(badTitleDictionary) == 0):
275+
return False
241276

242277
# Print badTitleDictionary Results
243-
printed = False
244278
for key in badTitleDictionary:
245-
if (len(badTitleDictionary[key]) != 0):
246-
printed = True
247-
print(key)
248-
for titles in badTitleDictionary[key]:
249-
print(titles)
250-
print()
251-
252-
# Exit code of 1 if there were bad titles
253-
if (printed):
254-
sys.exit(1)
279+
print()
280+
print(key)
281+
for titles in badTitleDictionary[key]:
282+
print(titles)
283+
284+
# Exit status of 1
285+
return True
286+
287+
288+
if __name__ == "__main__":
289+
parser = argparse.ArgumentParser(description = 'Validate capitalization for document headings')
290+
291+
parser.add_argument(
292+
"paths", nargs="+", default=".", help="Source paths of file/directory to check."
293+
)
294+
295+
parser.add_argument(
296+
"--format",
297+
default="{source_path}:{line_number}:{heading}:{msg}",
298+
help="Output format of incorrectly capitalized titles",
299+
)
300+
301+
args = parser.parse_args()
302+
303+
sys.exit(main(args.paths, args.format))

0 commit comments

Comments
 (0)