1
+ #!/usr/bin/env python
2
+
1
3
"""Python script for collecting the titles in the rst files and validating
2
4
if they follow the capitalization convention. Prints the titles that do not
3
5
follow the convention. Particularly used for .rst files in the doc/source folder
4
6
5
7
NOTE: Run from the root directory of pandas repository
6
8
7
9
Example:
8
- python ./scripts/validate_rst_title_capitalization.py doc/source/development/contributing.rst
10
+ ./scripts/validate_rst_title_capitalization.py doc/source/development/contributing.rst
9
11
10
12
Files that cannot be validated: (code crashes when validating for some reason)
11
13
doc/source/user_guide/io.rst
24
26
import os
25
27
from os import walk
26
28
29
+ class suppress_stdout_stderr (object ):
30
+ '''
31
+ Code source:
32
+ https://stackoverflow.com/questions/11130156/suppress-stdout-stderr-print-from-python-functions
33
+
34
+ A context manager for doing a "deep suppression" of stdout and stderr in
35
+ Python, i.e. will suppress all print, even if the print originates in a
36
+ compiled C/Fortran sub-function.
37
+ This will not suppress raised exceptions, since exceptions are printed
38
+ to stderr just before a script exits, and after the context manager has
39
+ exited (at least, I think that is why it lets exceptions through).
40
+
41
+ This code is needed to suppress output from the parser method
42
+ because the parser method prints to stdout when encountering Sphinx
43
+ references, as it cannot parse those at this moment.
44
+
45
+ '''
46
+ def __init__ (self ):
47
+ # Open a pair of null files
48
+ self .null_fds = [os .open (os .devnull ,os .O_RDWR ) for x in range (2 )]
49
+ # Save the actual stdout (1) and stderr (2) file descriptors.
50
+ self .save_fds = [os .dup (1 ), os .dup (2 )]
51
+
52
+ def __enter__ (self ):
53
+ # Assign the null pointers to stdout and stderr.
54
+ os .dup2 (self .null_fds [0 ],1 )
55
+ os .dup2 (self .null_fds [1 ],2 )
56
+
57
+ def __exit__ (self , * _ ):
58
+ # Re-assign the real stdout/stderr back to (1) and (2)
59
+ os .dup2 (self .save_fds [0 ],1 )
60
+ os .dup2 (self .save_fds [1 ],2 )
61
+ # Close all file descriptors
62
+ for fd in self .null_fds + self .save_fds :
63
+ os .close (fd )
64
+
65
+
27
66
# Keynames that would not follow capitalization convention
28
67
CAPITALIZATION_EXCEPTIONS = {
29
68
'pandas' , 'Python' , 'IPython' ,'PyTables' , 'Excel' , 'JSON' ,
35
74
# Dictionary of bad titles that will be printed later
36
75
badTitleDictionary = {}
37
76
77
+ # List of problematic tags that are exceptions to parent rule
78
+ listOfMarkers = {'emphasis' , 'strong' , 'reference' , 'literal' }
79
+
38
80
# List of files that, when validated, causes the program to crash
39
81
cannotValidate = ['doc/source/user_guide/io.rst' , 'doc/source/whatsnew/v0.17.1.rst' ]
40
82
41
- # Method returns true or false depending on whether title follows convention
42
- def followCapitalizationConvention (title ):
83
+ # Error Message:
84
+ errMessage = "Title capitalization formatted incorrectly. Manually format correctly"
85
+
86
+
87
+ def followCapitalizationConvention (title : str ) -> bool :
88
+ '''
89
+ Method returns true or false depending on whether a title follows
90
+ the capitalization convention
91
+
92
+ '''
43
93
44
94
# Lowercase representation of keynames
45
95
keyNamesLower = {'pandas' }
@@ -80,8 +130,24 @@ def followCapitalizationConvention(title):
80
130
81
131
return True
82
132
83
- # Method prints all of the bad titles
84
- def printBadTitles (rstFile ):
133
+ def findLineNumber (node : docutils .nodes ) -> int :
134
+ '''
135
+ Method that finds the line number in a document for a particular node
136
+
137
+ '''
138
+ if (node .tagname == 'document' ):
139
+ return 1
140
+ elif (node .line == None ):
141
+ return findLineNumber (node .parent )
142
+ else :
143
+ return node .line - 1
144
+
145
+ def fillBadTitleDictionary (rstFile : str ) -> None :
146
+ '''
147
+ Method that prints all of the bad titles
148
+ Message: [directory of rstFile, line number of bad title, error message]
149
+
150
+ '''
85
151
# Ensure file isn't one that causes the code to crash
86
152
if rstFile in cannotValidate :
87
153
return
@@ -99,15 +165,17 @@ def printBadTitles(rstFile):
99
165
components = (docutils .parsers .rst .Parser ,)
100
166
).get_default_values ()
101
167
document = docutils .utils .new_document ('Document' , settings )
102
- parser .parse (input , document )
103
168
104
- # print list of all the subtitles/headings that we want.
105
- # Note: allParentTagsOfText = {'problematic', 'title', 'emphasis', 'inline', 'strong', 'literal', 'literal_block', 'title_reference', 'reference', 'paragraph'}
106
- listOfMarkers = {'emphasis' , 'strong' , 'reference' , 'literal' }
169
+ with suppress_stdout_stderr ():
170
+ parser .parse (input , document )
171
+
172
+
173
+ # Fill up the titleList with lines that follow the title pattern
107
174
myText = ""
108
175
markerGrandparent = ""
109
176
beforeMarker = False
110
177
titleList = []
178
+ lineNumberList = []
111
179
for node in document .traverse (nodes .Text ):
112
180
if (node .parent .tagname == 'title' ):
113
181
if (beforeMarker and markerGrandparent == node .parent ):
@@ -116,28 +184,42 @@ def printBadTitles(rstFile):
116
184
else :
117
185
if (myText != "" ):
118
186
titleList .append (myText )
187
+ lineNumberList .append (lineno )
188
+ lineno = findLineNumber (node )
119
189
myText = node .astext ()
120
190
beforeMarker = False
121
191
elif (node .parent .parent .tagname == 'title' and
122
192
node .parent .tagname in listOfMarkers ):
193
+ lineno = findLineNumber (node )
123
194
myText = myText + node .astext ()
124
195
beforeMarker = True
125
196
markerGrandparent = node .parent .parent
126
197
else :
127
198
beforeMarker = False
128
199
if (myText != "" ):
129
200
titleList .append (myText )
201
+ lineNumberList .append (lineno )
130
202
myText = ""
203
+ lineno = 0
131
204
132
205
if (myText != "" ):
133
206
titleList .append (myText )
207
+ lineNumberList .append (lineno )
208
+
134
209
135
- for text in titleList :
136
- if not followCapitalizationConvention (text ):
137
- badTitleDictionary [rstFile ].append (text )
210
+ # For each line in the titleList, append the badTitleDictionary if
211
+ # the capitalization convention is not followed
212
+ for i in range (len (titleList )):
213
+ if not followCapitalizationConvention (titleList [i ]):
214
+ badTitleDictionary [rstFile ].append ((titleList [i ], lineNumberList [i ]))
138
215
139
- # Method finds all the bad titles, runs printBadTitles
140
- def findBadTitles (directoryAddress ):
216
+
217
+ def findBadTitles (directoryAddress : str ) -> None :
218
+
219
+ '''
220
+ Method finds all the bad titles, runs fillBadTitleDictionary
221
+
222
+ '''
141
223
f = []
142
224
if (directoryAddress .endswith (".rst" )):
143
225
f .append (directoryAddress )
@@ -148,19 +230,25 @@ def findBadTitles(directoryAddress):
148
230
f .append (os .path .join (dirpath , file ))
149
231
150
232
for filename in f :
151
- printBadTitles (filename )
233
+ fillBadTitleDictionary (filename )
152
234
153
235
# Main Method
154
236
if __name__ == "__main__" :
155
237
for i in range (1 , len (sys .argv )):
156
238
findBadTitles (sys .argv [i ])
157
239
158
- print ("\n \n BAD TITLES \n \n " )
240
+ print ("BAD TITLES \n \n " )
159
241
160
242
# Print badTitleDictionary Results
243
+ printed = False
161
244
for key in badTitleDictionary :
162
245
if (len (badTitleDictionary [key ]) != 0 ):
246
+ printed = True
163
247
print (key )
164
248
for titles in badTitleDictionary [key ]:
165
249
print (titles )
166
250
print ()
251
+
252
+ # Exit code of 1 if there were bad titles
253
+ if (printed ):
254
+ sys .exit (1 )
0 commit comments