Skip to content

Commit 50661c3

Browse files
author
awu42
committed
added line number to validation script
1 parent 4c83edb commit 50661c3

File tree

1 file changed

+104
-16
lines changed

1 file changed

+104
-16
lines changed

scripts/validate_rst_title_capitalization.py

100644100755
+104-16
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1+
#!/usr/bin/env python
2+
13
"""Python script for collecting the titles in the rst files and validating
24
if they follow the capitalization convention. Prints the titles that do not
35
follow the convention. Particularly used for .rst files in the doc/source folder
46
57
NOTE: Run from the root directory of pandas repository
68
79
Example:
8-
python ./scripts/validate_rst_title_capitalization.py doc/source/development/contributing.rst
10+
./scripts/validate_rst_title_capitalization.py doc/source/development/contributing.rst
911
1012
Files that cannot be validated: (code crashes when validating for some reason)
1113
doc/source/user_guide/io.rst
@@ -24,6 +26,43 @@
2426
import os
2527
from os import walk
2628

29+
class suppress_stdout_stderr(object):
30+
'''
31+
Code source:
32+
https://stackoverflow.com/questions/11130156/suppress-stdout-stderr-print-from-python-functions
33+
34+
A context manager for doing a "deep suppression" of stdout and stderr in
35+
Python, i.e. will suppress all print, even if the print originates in a
36+
compiled C/Fortran sub-function.
37+
This will not suppress raised exceptions, since exceptions are printed
38+
to stderr just before a script exits, and after the context manager has
39+
exited (at least, I think that is why it lets exceptions through).
40+
41+
This code is needed to suppress output from the parser method
42+
because the parser method prints to stdout when encountering Sphinx
43+
references, as it cannot parse those at this moment.
44+
45+
'''
46+
def __init__(self):
47+
# Open a pair of null files
48+
self.null_fds = [os.open(os.devnull,os.O_RDWR) for x in range(2)]
49+
# Save the actual stdout (1) and stderr (2) file descriptors.
50+
self.save_fds = [os.dup(1), os.dup(2)]
51+
52+
def __enter__(self):
53+
# Assign the null pointers to stdout and stderr.
54+
os.dup2(self.null_fds[0],1)
55+
os.dup2(self.null_fds[1],2)
56+
57+
def __exit__(self, *_):
58+
# Re-assign the real stdout/stderr back to (1) and (2)
59+
os.dup2(self.save_fds[0],1)
60+
os.dup2(self.save_fds[1],2)
61+
# Close all file descriptors
62+
for fd in self.null_fds + self.save_fds:
63+
os.close(fd)
64+
65+
2766
# Keynames that would not follow capitalization convention
2867
CAPITALIZATION_EXCEPTIONS = {
2968
'pandas', 'Python', 'IPython','PyTables', 'Excel', 'JSON',
@@ -35,11 +74,22 @@
3574
# Dictionary of bad titles that will be printed later
3675
badTitleDictionary = {}
3776

77+
# List of problematic tags that are exceptions to parent rule
78+
listOfMarkers = {'emphasis', 'strong', 'reference', 'literal'}
79+
3880
# List of files that, when validated, causes the program to crash
3981
cannotValidate = ['doc/source/user_guide/io.rst', 'doc/source/whatsnew/v0.17.1.rst']
4082

41-
# Method returns true or false depending on whether title follows convention
42-
def followCapitalizationConvention(title):
83+
# Error Message:
84+
errMessage = "Title capitalization formatted incorrectly. Manually format correctly"
85+
86+
87+
def followCapitalizationConvention(title: str) -> bool:
88+
'''
89+
Method returns true or false depending on whether a title follows
90+
the capitalization convention
91+
92+
'''
4393

4494
# Lowercase representation of keynames
4595
keyNamesLower = {'pandas'}
@@ -80,8 +130,24 @@ def followCapitalizationConvention(title):
80130

81131
return True
82132

83-
# Method prints all of the bad titles
84-
def printBadTitles(rstFile):
133+
def findLineNumber(node: docutils.nodes) -> int:
134+
'''
135+
Method that finds the line number in a document for a particular node
136+
137+
'''
138+
if (node.tagname == 'document'):
139+
return 1
140+
elif (node.line == None):
141+
return findLineNumber(node.parent)
142+
else:
143+
return node.line - 1
144+
145+
def fillBadTitleDictionary(rstFile: str) -> None:
146+
'''
147+
Method that prints all of the bad titles
148+
Message: [directory of rstFile, line number of bad title, error message]
149+
150+
'''
85151
# Ensure file isn't one that causes the code to crash
86152
if rstFile in cannotValidate:
87153
return
@@ -99,15 +165,17 @@ def printBadTitles(rstFile):
99165
components=(docutils.parsers.rst.Parser,)
100166
).get_default_values()
101167
document = docutils.utils.new_document('Document', settings)
102-
parser.parse(input, document)
103168

104-
# print list of all the subtitles/headings that we want.
105-
# Note: allParentTagsOfText = {'problematic', 'title', 'emphasis', 'inline', 'strong', 'literal', 'literal_block', 'title_reference', 'reference', 'paragraph'}
106-
listOfMarkers = {'emphasis', 'strong', 'reference', 'literal'}
169+
with suppress_stdout_stderr():
170+
parser.parse(input, document)
171+
172+
173+
# Fill up the titleList with lines that follow the title pattern
107174
myText = ""
108175
markerGrandparent = ""
109176
beforeMarker = False
110177
titleList = []
178+
lineNumberList = []
111179
for node in document.traverse(nodes.Text):
112180
if (node.parent.tagname == 'title'):
113181
if (beforeMarker and markerGrandparent == node.parent):
@@ -116,28 +184,42 @@ def printBadTitles(rstFile):
116184
else:
117185
if (myText != ""):
118186
titleList.append(myText)
187+
lineNumberList.append(lineno)
188+
lineno = findLineNumber(node)
119189
myText = node.astext()
120190
beforeMarker = False
121191
elif (node.parent.parent.tagname == 'title' and
122192
node.parent.tagname in listOfMarkers):
193+
lineno = findLineNumber(node)
123194
myText = myText + node.astext()
124195
beforeMarker = True
125196
markerGrandparent = node.parent.parent
126197
else:
127198
beforeMarker = False
128199
if (myText != ""):
129200
titleList.append(myText)
201+
lineNumberList.append(lineno)
130202
myText = ""
203+
lineno = 0
131204

132205
if (myText != ""):
133206
titleList.append(myText)
207+
lineNumberList.append(lineno)
208+
134209

135-
for text in titleList:
136-
if not followCapitalizationConvention(text):
137-
badTitleDictionary[rstFile].append(text)
210+
# For each line in the titleList, append the badTitleDictionary if
211+
# the capitalization convention is not followed
212+
for i in range(len(titleList)):
213+
if not followCapitalizationConvention(titleList[i]):
214+
badTitleDictionary[rstFile].append((titleList[i], lineNumberList[i]))
138215

139-
# Method finds all the bad titles, runs printBadTitles
140-
def findBadTitles(directoryAddress):
216+
217+
def findBadTitles(directoryAddress: str) -> None:
218+
219+
'''
220+
Method finds all the bad titles, runs fillBadTitleDictionary
221+
222+
'''
141223
f = []
142224
if (directoryAddress.endswith(".rst")):
143225
f.append(directoryAddress)
@@ -148,19 +230,25 @@ def findBadTitles(directoryAddress):
148230
f.append(os.path.join(dirpath, file))
149231

150232
for filename in f:
151-
printBadTitles(filename)
233+
fillBadTitleDictionary(filename)
152234

153235
# Main Method
154236
if __name__ == "__main__":
155237
for i in range(1, len(sys.argv)):
156238
findBadTitles(sys.argv[i])
157239

158-
print("\n \nBAD TITLES \n \n")
240+
print("BAD TITLES \n \n")
159241

160242
# Print badTitleDictionary Results
243+
printed = False
161244
for key in badTitleDictionary:
162245
if (len(badTitleDictionary[key]) != 0):
246+
printed = True
163247
print(key)
164248
for titles in badTitleDictionary[key]:
165249
print(titles)
166250
print()
251+
252+
# Exit code of 1 if there were bad titles
253+
if (printed):
254+
sys.exit(1)

0 commit comments

Comments
 (0)