1
1
#!/usr/bin/env python
2
-
3
2
"""
4
- GH #29641
5
-
6
- Collect the titles in the rst files and validate if they follow the proper
7
- capitalization convention.
3
+ Validate that the titles in the rst files follow the proper capitalization convention.
8
4
9
- Prints the titles that do not follow the convention.
5
+ Print the titles that do not follow the convention.
10
6
11
7
Usage::
12
8
./scripts/validate_rst_title_capitalization.py doc/source/development/contributing.rst
13
9
./scripts/validate_rst_title_capitalization.py doc/source/
14
10
15
11
"""
16
-
17
12
import argparse
18
13
import sys
19
14
import re
20
15
import os
21
- from os import walk
22
- from typing import Generator , List
16
+ from typing import Tuple , Generator , List
23
17
24
18
25
- # Keynames that would not follow capitalization convention
26
19
CAPITALIZATION_EXCEPTIONS = {
27
20
"pandas" ,
28
21
"Python" ,
54
47
"Docker" ,
55
48
}
56
49
57
- # Lowercase representation of CAPITALIZATION_EXCEPTIONS
58
- CAPITALIZATION_EXCEPTIONS_LOWER = {word .lower () for word in CAPITALIZATION_EXCEPTIONS }
50
+ CAP_EXCEPTIONS_DICT = {
51
+ word .lower (): word for word in CAPITALIZATION_EXCEPTIONS
52
+ }
59
53
60
- # Dictionary of bad titles that will be printed later along with line numbers
61
- # Key: Document Directory, Value: Pair(Bad Title, Line Number)
62
54
bad_title_dict = {}
63
55
64
- # Error Message:
65
56
err_msg = "Heading capitalization formatted incorrectly. Please correctly capitalize"
66
57
58
+ def correct_title_capitalization (title : str ) -> str :
59
+ """
60
+ Algorithm to create the correct capitalization for a given title
61
+
62
+ Parameters
63
+ ----------
64
+ title : str
65
+ Heading string to correct
66
+
67
+ Returns
68
+ -------
69
+ correct_title : str
70
+ Correctly capitalized title
67
71
68
- def is_following_capitalization_convention (title : str ) -> bool :
69
72
"""
70
- Algorithm to determine if a heading follows the capitalization convention
71
73
72
- This method returns true if the title follows the convention
73
- and false if it does not
74
+ correct_title : str = title .capitalize ()
75
+
76
+ removed_https_title = re .sub (r"<https?:\/\/.*[\r\n]*>" , "" , correct_title )
77
+
78
+ word_list = re .split (r"\W" , removed_https_title )
79
+
80
+ for word in word_list :
81
+ if word .lower () in CAP_EXCEPTIONS_DICT :
82
+ correct_title = re .sub (
83
+ r'\b' + word + r'\b' , CAP_EXCEPTIONS_DICT [word .lower ()], correct_title
84
+ )
85
+
86
+ return correct_title
87
+
88
+
89
+ def is_following_capitalization_convention (title : str ) -> bool :
90
+ """
91
+ Algorithm to determine if a title is capitalized correctly
74
92
75
93
Parameters
76
94
----------
@@ -80,49 +98,19 @@ def is_following_capitalization_convention(title: str) -> bool:
80
98
Returns
81
99
-------
82
100
bool
83
- True if capitalization is correct , False if not
101
+ True if title capitalized correctly , False if not
84
102
85
103
"""
86
104
87
- # Remove https link if present in heading
88
- title = re .sub (r"<https?:\/\/.*[\r\n]*>" , "" , title )
105
+ correct_title = correct_title_capitalization (title )
89
106
90
- # Split with delimiters comma, semicolon and space, parentheses, colon, slashes
91
- word_list = re .split (r"[;,-/():\s]\s*" , title )
92
-
93
- # Edge Case: First word is an empty string
94
- if len (word_list [0 ]) == 0 :
107
+ if (title != correct_title ):
95
108
return False
109
+ else :
110
+ return True
111
+
96
112
97
- # Dealing with the first word of the title
98
- if word_list [0 ] not in CAPITALIZATION_EXCEPTIONS :
99
- # word is not in CAPITALIZATION_EXCEPTIONS but has different capitalization
100
- if word_list [0 ].lower () in CAPITALIZATION_EXCEPTIONS_LOWER :
101
- return False
102
- # First letter of first word must be uppercase
103
- if not word_list [0 ][0 ].isupper ():
104
- return False
105
- # Remaining letters of first word must not be uppercase
106
- for j in range (1 , len (word_list [0 ])):
107
- if word_list [0 ][j ].isupper ():
108
- return False
109
-
110
- # Remaining letters must not be uppercase letters
111
- for i in range (1 , len (word_list )):
112
- if word_list [i ] not in CAPITALIZATION_EXCEPTIONS :
113
- # word is not in CAPITALIZATION_EXCEPTIONS but has different capitalization
114
- if word_list [i ].lower () in CAPITALIZATION_EXCEPTIONS_LOWER :
115
- return False
116
- # Remaining letters must not be uppercase
117
- for j in range (len (word_list [i ])):
118
- if word_list [i ][j ].isupper ():
119
- return False
120
-
121
- # Returning True if the heading follows the capitalization convention
122
- return True
123
-
124
-
125
- def findTitles (rst_file : str ) -> Generator [List [str ], List [int ], None ]:
113
+ def find_titles (rst_file : str ) -> Generator [Tuple [str , int ], None , None ]:
126
114
"""
127
115
Algorithm to identify particular text that should be considered headings in an
128
116
RST file
@@ -135,27 +123,19 @@ def findTitles(rst_file: str) -> Generator[List[str], List[int], None]:
135
123
rst_file : str
136
124
RST file to scan through for headings
137
125
138
- Returns
126
+ Yields
139
127
-------
140
- title_list : List[ str]
141
- A list of heading strings found in the document tree
128
+ title : str
129
+ A heading found in the rst file
142
130
143
- line_number_list : List[ int]
144
- The corresponding line numbers of the headings in title_list
131
+ line_number : int
132
+ The corresponding line number of the heading
145
133
146
134
"""
147
135
148
- # title_list is the list of headings that is encountered in the doctree
149
- title_list : List [str ] = []
150
-
151
- # List of line numbers that corresponding headings in title_list can be found at
152
- line_number_list : List [int ] = []
153
-
154
- # Open and read the .rst file and store the string of data into lines
155
136
with open (rst_file , "r" ) as file_obj :
156
137
lines = file_obj .read ().split ("\n " )
157
138
158
- # Regular expressions that denote a title beforehand
159
139
regex = {
160
140
"*" : r"^(?:\*{1})*$" ,
161
141
"=" : r"^(?:={1})*$" ,
@@ -166,26 +146,20 @@ def findTitles(rst_file: str) -> Generator[List[str], List[int], None]:
166
146
'"' : r'^(?:"{1})*$' ,
167
147
}
168
148
169
- # '*`_' markers are removed from original string text.
170
149
table = str .maketrans ("" , "" , "*`_" )
171
150
172
- # Loop through lines lines, appending if they are considered headings
173
- for lineno in range (1 , len (lines )):
174
- if len (lines [lineno ]) != 0 and len (lines [lineno - 1 ]) != 0 :
151
+ for line_no in range (1 , len (lines )):
152
+ if len (lines [line_no ]) != 0 and len (lines [line_no - 1 ]) != 0 :
175
153
for key in regex :
176
- match = re .search (regex [key ], lines [lineno ])
154
+ match = re .search (regex [key ], lines [line_no ])
177
155
if match is not None :
178
- if lineno >= 2 :
179
- if lines [lineno ] == lines [lineno - 2 ]:
180
- if len (lines [lineno ]) == len (lines [lineno - 1 ]):
181
- title_list .append (lines [lineno - 1 ].translate (table ))
182
- line_number_list .append (lineno )
156
+ if line_no >= 2 :
157
+ if lines [line_no ] == lines [line_no - 2 ]:
158
+ if len (lines [line_no ]) == len (lines [line_no - 1 ]):
159
+ yield lines [line_no - 1 ].translate (table ), line_no
183
160
break
184
- if len (lines [lineno ]) >= len (lines [lineno - 1 ]):
185
- title_list .append (lines [lineno - 1 ].translate (table ))
186
- line_number_list .append (lineno )
187
-
188
- return title_list , line_number_list
161
+ if len (lines [line_no ]) >= len (lines [line_no - 1 ]):
162
+ yield lines [line_no - 1 ].translate (table ), line_no
189
163
190
164
191
165
def fill_bad_title_dict (rst_file : str ) -> None :
@@ -199,20 +173,15 @@ def fill_bad_title_dict(rst_file: str) -> None:
199
173
200
174
"""
201
175
202
- # Ensure this file doesn't already have a bad_title_dict slot
203
176
if rst_file in bad_title_dict :
204
177
return
205
178
206
- # Make a list of headings along with their line numbers
207
- title_list , line_number_list = findTitles (rst_file )
208
-
209
- # Append the bad_title_dict if the capitalization convention not followed
210
- for i in range (len (title_list )):
211
- if not is_following_capitalization_convention (title_list [i ]):
179
+ for title , line_number in find_titles (rst_file ):
180
+ if not is_following_capitalization_convention (title ):
212
181
if rst_file not in bad_title_dict :
213
- bad_title_dict [rst_file ] = [(title_list [ i ], line_number_list [ i ] )]
182
+ bad_title_dict [rst_file ] = [(title , line_number )]
214
183
else :
215
- bad_title_dict [rst_file ].append ((title_list [ i ], line_number_list [ i ] ))
184
+ bad_title_dict [rst_file ].append ((title , line_number ))
216
185
217
186
218
187
def find_rst_files (source_paths : List [str ]) -> Generator [str , None , None ]:
@@ -232,7 +201,6 @@ def find_rst_files(source_paths: List[str]) -> Generator[str, None, None]:
232
201
233
202
"""
234
203
235
- # Loop through source_paths, recursively looking for .rst files
236
204
for directory_address in source_paths :
237
205
if not os .path .exists (directory_address ):
238
206
raise ValueError (
@@ -241,7 +209,7 @@ def find_rst_files(source_paths: List[str]) -> Generator[str, None, None]:
241
209
elif directory_address .endswith (".rst" ):
242
210
yield directory_address
243
211
else :
244
- for (dirpath , dirnames , filenames ) in walk (directory_address ):
212
+ for (dirpath , _ , filenames ) in os . walk (directory_address ):
245
213
for file in filenames :
246
214
if file .endswith (".rst" ):
247
215
yield os .path .join (dirpath , file )
@@ -260,32 +228,28 @@ def main(source_paths: List[str], output_format: str) -> bool:
260
228
261
229
Returns
262
230
-------
263
- is_failed : bool
231
+ number_of_errors : int
264
232
True if there are headings that are printed, False if not
265
233
266
234
"""
267
235
268
- is_failed : bool = False
236
+ number_of_errors : int = 0
269
237
270
- # Make a list of all RST files from command line directory list
271
238
directory_list = find_rst_files (source_paths )
272
239
273
- # Fill the bad_title_dict, which contains all incorrectly capitalized headings
274
240
for filename in directory_list :
275
241
fill_bad_title_dict (filename )
276
242
277
- # Return an exit status of 0 if there are no bad titles in the dictionary
278
- if len (bad_title_dict ) == 0 :
279
- return is_failed
243
+ if (len (bad_title_dict ) == 0 ):
244
+ return number_of_errors
280
245
281
- # Print bad_title_dict Results
282
- is_failed = True
283
246
for key in bad_title_dict :
284
247
for line in bad_title_dict [key ]:
285
- print (key + ":" + str (line [1 ]) + ": " + err_msg + ' "' + line [0 ] + '"' )
248
+ correct_title = correct_title_capitalization (line [0 ])
249
+ print (f'{ key } :{ line [1 ]} :{ err_msg } "{ line [0 ]} " to "{ correct_title } "' )
250
+ number_of_errors += 1
286
251
287
- # Exit status of 0
288
- return is_failed
252
+ return number_of_errors
289
253
290
254
291
255
if __name__ == "__main__" :
@@ -298,7 +262,7 @@ def main(source_paths: List[str], output_format: str) -> bool:
298
262
parser .add_argument (
299
263
"--format" ,
300
264
"-f" ,
301
- default = "{source_path}:{line_number}:{heading }:{msg }" ,
265
+ default = "{source_path}:{line_number}:{msg }:{heading }" ,
302
266
help = "Output format of incorrectly capitalized titles" ,
303
267
)
304
268
0 commit comments