Skip to content

Commit 0e344ad

Browse files
author
awu42
committed
Testing script on doc/source/development/contributing.rst (pandas-dev#26941)
1 parent 60d8db9 commit 0e344ad

File tree

1 file changed

+64
-37
lines changed

1 file changed

+64
-37
lines changed

scripts/validate_rst_title_capitalization.py

+64-37
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,34 @@
2424

2525
# Keynames that would not follow capitalization convention
2626
CAPITALIZATION_EXCEPTIONS = {
27-
'pandas', 'Python', 'IPython', 'PyTables', 'Excel', 'JSON',
28-
'HTML', 'SAS', 'SQL', 'BigQuery', 'STATA', 'Interval', 'PEP8',
29-
'Period', 'Series', 'Index', 'DataFrame', 'C', 'Git', 'GitHub', 'NumPy',
30-
'Apache', 'Arrow', 'Parquet', 'MultiIndex', 'NumFOCUS', 'sklearn-pandas'
27+
"pandas",
28+
"Python",
29+
"IPython",
30+
"PyTables",
31+
"Excel",
32+
"JSON",
33+
"HTML",
34+
"SAS",
35+
"SQL",
36+
"BigQuery",
37+
"STATA",
38+
"Interval",
39+
"PEP8",
40+
"Period",
41+
"Series",
42+
"Index",
43+
"DataFrame",
44+
"C",
45+
"Git",
46+
"GitHub",
47+
"NumPy",
48+
"Apache",
49+
"Arrow",
50+
"Parquet",
51+
"MultiIndex",
52+
"NumFOCUS",
53+
"sklearn",
54+
"Docker",
3155
}
3256

3357
# Lowercase representation of CAPITALIZATION_EXCEPTIONS
@@ -38,11 +62,11 @@
3862
bad_title_dict = {}
3963

4064
# Error Message:
41-
err_msg = 'Heading capitalization formatted incorrectly. Please correctly capitalize'
65+
err_msg = "Heading capitalization formatted incorrectly. Please correctly capitalize"
4266

4367

4468
def follow_capitalization_convention(title: str) -> bool:
45-
'''
69+
"""
4670
Algorithm to determine if a heading follows the capitalization convention
4771
4872
This method returns true if the title follows the convention
@@ -58,13 +82,13 @@ def follow_capitalization_convention(title: str) -> bool:
5882
bool
5983
True if capitalization is correct, False if not
6084
61-
'''
85+
"""
6286

6387
# split with delimiters comma, semicolon and space, parentheses, colon, slashes
64-
word_list = re.split(r'[;,/():\s]\s*', title)
88+
word_list = re.split(r"[;,-/():\s]\s*", title)
6589

6690
# Edge Case: First word is an empty string
67-
if (len(word_list[0]) == 0):
91+
if len(word_list[0]) == 0:
6892
return False
6993

7094
# Dealing with the first word of the title
@@ -73,7 +97,7 @@ def follow_capitalization_convention(title: str) -> bool:
7397
if word_list[0].lower() in CAPITALIZATION_EXCEPTIONS_LOWER:
7498
return False
7599
# First letter of first word must be uppercase
76-
if (not word_list[0][0].isupper()):
100+
if not word_list[0][0].isupper():
77101
return False
78102
# Remaining letters of first word must not be uppercase
79103
for j in range(1, len(word_list[0])):
@@ -96,7 +120,7 @@ def follow_capitalization_convention(title: str) -> bool:
96120

97121

98122
def findTitles(rst_file: str) -> Generator[List[str], List[int], None]:
99-
'''
123+
"""
100124
Algorithm to identify particular text that should be considered headings in an
101125
RST file
102126
@@ -116,7 +140,7 @@ def findTitles(rst_file: str) -> Generator[List[str], List[int], None]:
116140
line_number_list : List[int]
117141
The corresponding line numbers of the headings in title_list
118142
119-
'''
143+
"""
120144

121145
# title_list is the list of headings that is encountered in the doctree
122146
title_list: List[str] = []
@@ -126,47 +150,52 @@ def findTitles(rst_file: str) -> Generator[List[str], List[int], None]:
126150

127151
# Open and read the .rst file and store the string of data into input
128152
f = open(rst_file, "r")
129-
input = f.read().split('\n')
153+
input = f.read().split("\n")
154+
f.close()
130155

131156
# Regular expressions that denote a title beforehand
132157
regex = {
133-
'*': r'^(?:\*{1})*$', '=': r'^(?:={1})*$', '-': r'^(?:-{1})*$',
134-
'^': r'^(?:\^{1})*$', '~': r'^(?:~{1})*$', '#': r'^(?:#{1})*$',
135-
'"': r'^(?:"{1})*$'
158+
"*": r"^(?:\*{1})*$",
159+
"=": r"^(?:={1})*$",
160+
"-": r"^(?:-{1})*$",
161+
"^": r"^(?:\^{1})*$",
162+
"~": r"^(?:~{1})*$",
163+
"#": r"^(?:#{1})*$",
164+
'"': r'^(?:"{1})*$',
136165
}
137166

138167
# '*`_' markers are removed from original string text.
139-
table = str.maketrans("", "", '*`_')
168+
table = str.maketrans("", "", "*`_")
140169

141170
# Loop through input lines, appending if they are considered headings
142171
for lineno in range(1, len(input)):
143-
if (len(input[lineno]) != 0 and len(input[lineno - 1]) != 0):
172+
if len(input[lineno]) != 0 and len(input[lineno - 1]) != 0:
144173
for key in regex:
145174
match = re.search(regex[key], input[lineno])
146-
if (match is not None):
147-
if (lineno >= 2):
148-
if (input[lineno] == input[lineno - 2]):
149-
if (len(input[lineno]) == len(input[lineno - 1])):
175+
if match is not None:
176+
if lineno >= 2:
177+
if input[lineno] == input[lineno - 2]:
178+
if len(input[lineno]) == len(input[lineno - 1]):
150179
title_list.append(input[lineno - 1].translate(table))
151180
line_number_list.append(lineno)
152181
break
153-
if (len(input[lineno]) >= len(input[lineno - 1])):
182+
if len(input[lineno]) >= len(input[lineno - 1]):
154183
title_list.append(input[lineno - 1].translate(table))
155184
line_number_list.append(lineno)
156185

157186
return title_list, line_number_list
158187

159188

160189
def fill_bad_title_dict(rst_file: str) -> None:
161-
'''
190+
"""
162191
Method that fills up the bad_title_dict with incorrectly capitalized headings
163192
164193
Parameters
165194
----------
166195
rst_file : str
167196
Directory address of a .rst file as a string
168197
169-
'''
198+
"""
170199

171200
# Ensure this file doesn't already have a bad_title_dict slot
172201
if rst_file in bad_title_dict:
@@ -185,7 +214,7 @@ def fill_bad_title_dict(rst_file: str) -> None:
185214

186215

187216
def find_rst_files(source_paths: List[str]) -> List[str]:
188-
'''
217+
"""
189218
Given the command line arguments of directory paths, this method
190219
yields the strings of the .rst file directories that these paths contain
191220
@@ -199,15 +228,15 @@ def find_rst_files(source_paths: List[str]) -> List[str]:
199228
directory_address : str
200229
Directory address of a .rst files found in command line argument directories
201230
202-
'''
231+
"""
203232

204233
# Loop through source_paths, recursively looking for .rst files
205234
for directory_address in source_paths:
206235
if not os.path.exists(directory_address):
207236
raise ValueError(
208237
"Please enter a valid path, pointing to a valid file/directory."
209238
)
210-
elif (directory_address.endswith(".rst")):
239+
elif directory_address.endswith(".rst"):
211240
yield directory_address
212241
else:
213242
for (dirpath, dirnames, filenames) in walk(directory_address):
@@ -217,7 +246,7 @@ def find_rst_files(source_paths: List[str]) -> List[str]:
217246

218247

219248
def main(source_paths: List[str], output_format: str) -> bool:
220-
'''
249+
"""
221250
The main method to print all headings with incorrect capitalization
222251
223252
Parameters
@@ -232,9 +261,9 @@ def main(source_paths: List[str], output_format: str) -> bool:
232261
is_failed : bool
233262
True if there are headings that are printed, False if not
234263
235-
'''
264+
"""
236265

237-
is_failed : bool = False
266+
is_failed: bool = False
238267

239268
# Make a list of all RST files from command line directory list
240269
directory_list = find_rst_files(source_paths)
@@ -245,23 +274,21 @@ def main(source_paths: List[str], output_format: str) -> bool:
245274
fill_bad_title_dict(filename)
246275

247276
# Return an exit status of 0 if there are no bad titles in the dictionary
248-
if (len(bad_title_dict) == 0):
277+
if len(bad_title_dict) == 0:
249278
return is_failed
250279

251280
# Print bad_title_dict Results
252-
print()
281+
is_failed = True
253282
for key in bad_title_dict:
254283
for line in bad_title_dict[key]:
255-
print(
256-
key + ":" + str(line[1]) + ": " + err_msg + " \"" + line[0] + "\""
257-
)
284+
print(key + ":" + str(line[1]) + ": " + err_msg + ' "' + line[0] + '"')
258285

259286
# Exit status of 0
260287
return is_failed
261288

262289

263290
if __name__ == "__main__":
264-
parser = argparse.ArgumentParser(description='Validate heading capitalization')
291+
parser = argparse.ArgumentParser(description="Validate heading capitalization")
265292

266293
parser.add_argument(
267294
"paths", nargs="+", default=".", help="Source paths of file/directory to check."

0 commit comments

Comments
 (0)