Skip to content

Commit c4ff8bd

Browse files
author
awu42
committed
Testing validate_rst_capitalization.py script (pandas-dev#26941)
1 parent 635163d commit c4ff8bd

File tree

2 files changed

+42
-34
lines changed

2 files changed

+42
-34
lines changed

ci/code_checks.sh

+5-6
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,11 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
318318
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=GL03,GL04,GL05,GL06,GL07,GL09,GL10,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT01,RT04,RT05,SA02,SA03,SA05
319319
RET=$(($RET + $?)) ; echo $MSG "DONE"
320320

321+
### VALIDATE TITLE CAPITALIZATION CONVENTION ###
322+
MSG='Validate correct capitalization among titles in documentation' ; echo $MSG
323+
$BASE_DIR/scripts/validate_rst_title_capitalization.py $BASE_DIR/doc/source/development/contributing.rst
324+
RET=$(($RET + $?)) ; echo $MSG "DONE"
325+
321326
fi
322327

323328
### DEPENDENCIES ###
@@ -340,10 +345,4 @@ if [[ -z "$CHECK" || "$CHECK" == "typing" ]]; then
340345
RET=$(($RET + $?)) ; echo $MSG "DONE"
341346
fi
342347

343-
### VALIDATE TITLE CAPITALIZATION CONVENTION (Do I need an IF statement?)###
344-
MSG='Validate correct capitalization among titles in documentation' ; echo $MSG
345-
$BASE_DIR/scripts/validate_rst_title_capitalization.py $BASE_DIR/doc/source/development/contributing.rst
346-
$BASE_DIR/scripts/validate_rst_title_capitalization.py $BASE_DIR/doc/source/index.rst $BASE_DIR/doc/source/ecosystem.rst
347-
RET=$(($RET + $?)) ; echo $MSG "DONE"
348-
349348
exit $RET

scripts/validate_rst_title_capitalization.py

+37-28
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
1212
Examples:
1313
./scripts/validate_rst_title_capitalization.py doc/source/development/contributing.rst
14-
./scripts/validate_rst_title_capitalization.py doc/source/index.rst doc/source/ecosystem.rst
1514
./scripts/validate_rst_title_capitalization.py doc/source/
1615
1716
Files that cannot be validated: (code crashes when validating for some reason)
@@ -33,7 +32,8 @@
3332
from os import walk
3433
from typing import Generator, List, Tuple
3534

36-
class suppress_stdout_stderr(object):
35+
36+
class suppress_stdout_stderr:
3737
'''
3838
Code source:
3939
https://stackoverflow.com/questions/11130156/suppress-stdout-stderr-print-from-python-functions
@@ -52,27 +52,27 @@ class suppress_stdout_stderr(object):
5252
'''
5353
def __init__(self):
5454
# Open a pair of null files
55-
self.null_fds = [os.open(os.devnull,os.O_RDWR) for x in range(2)]
55+
self.null_fds = [os.open(os.devnull, os.O_RDWR) for x in range(2)]
5656
# Save the actual stdout (1) and stderr (2) file descriptors.
5757
self.save_fds = [os.dup(1), os.dup(2)]
5858

5959
def __enter__(self):
6060
# Assign the null pointers to stdout and stderr.
61-
os.dup2(self.null_fds[0],1)
62-
os.dup2(self.null_fds[1],2)
61+
os.dup2(self.null_fds[0], 1)
62+
os.dup2(self.null_fds[1], 2)
6363

6464
def __exit__(self, *_):
6565
# Re-assign the real stdout/stderr back to (1) and (2)
66-
os.dup2(self.save_fds[0],1)
67-
os.dup2(self.save_fds[1],2)
66+
os.dup2(self.save_fds[0], 1)
67+
os.dup2(self.save_fds[1], 2)
6868
# Close all file descriptors
6969
for fd in self.null_fds + self.save_fds:
7070
os.close(fd)
7171

7272

7373
# Keynames that would not follow capitalization convention
7474
CAPITALIZATION_EXCEPTIONS = {
75-
'pandas', 'Python', 'IPython','PyTables', 'Excel', 'JSON',
75+
'pandas', 'Python', 'IPython', 'PyTables', 'Excel', 'JSON',
7676
'HTML', 'SAS', 'SQL', 'BigQuery', 'STATA', 'Interval', 'PEP8',
7777
'Period', 'Series', 'Index', 'DataFrame', 'C', 'Git', 'GitHub', 'NumPy',
7878
'Apache', 'Arrow', 'Parquet', 'Triage', 'MultiIndex', 'NumFOCUS', 'sklearn-pandas'
@@ -92,7 +92,8 @@ def __exit__(self, *_):
9292
cannotValidate = ['doc/source/user_guide/io.rst', 'doc/source/whatsnew/v0.17.1.rst']
9393

9494
# Error Message:
95-
errMessage = "Heading capitalization formatted incorrectly. Please correctly capitalize"
95+
errMessage = 'Heading capitalization formatted incorrectly. Please correctly capitalize'
96+
9697

9798
def followCapitalizationConvention(title: str) -> bool:
9899
'''
@@ -104,7 +105,7 @@ def followCapitalizationConvention(title: str) -> bool:
104105
'''
105106

106107
# split with delimiters comma, semicolon and space, parentheses, colon, slashes
107-
wordList = re.split(r'[;,/():\s]\s*', title) # followed by any amount of extra whitespace.
108+
wordList = re.split(r'[;,/():\s]\s*', title)
108109

109110
# Edge Case: First word is an empty string
110111
if (len(wordList[0]) == 0):
@@ -137,6 +138,7 @@ def followCapitalizationConvention(title: str) -> bool:
137138
# Returning True if the heading follows the capitalization convention
138139
return True
139140

141+
140142
def findLineNumber(node: docutils.nodes) -> int:
141143
'''
142144
Recursive method that finds the line number in a document for a particular node
@@ -149,11 +151,12 @@ def findLineNumber(node: docutils.nodes) -> int:
149151
'''
150152
if (node.tagname == 'document'):
151153
return 1
152-
elif (node.line == None):
154+
elif (node.line is None):
153155
return findLineNumber(node.parent)
154156
else:
155157
return node.line - 1
156158

159+
157160
def parseRST(rstFile: str) -> docutils.nodes.document:
158161
'''
159162
Method to parse through an rstFile and return a document tree
@@ -169,19 +172,21 @@ def parseRST(rstFile: str) -> docutils.nodes.document:
169172
# Set up default settings for the document tree
170173
settings = docutils.frontend.OptionParser(
171174
components=(docutils.parsers.rst.Parser,)
172-
).get_default_values()
175+
).get_default_values()
173176

174177
# Initialize an empty document tree with the default settings from above
175178
document = docutils.utils.new_document('Document', settings)
176179

177-
# Parse the input string into an RST document tree, suppressing any stdout from the parse method
180+
# Parse input into an RST doctree, suppressing any stdout from parse method
178181
with suppress_stdout_stderr():
179182
parser.parse(input, document)
180183

181184
# Return the root node of the document tree
182185
return document
183186

184-
def findBadTitlesInDoctree(document: docutils.nodes.document) -> Generator[List[str], List[int], None]:
187+
188+
def findBadTitlesInDoctree(document: docutils.nodes.document) -> Generator[
189+
List[str], List[int], None]:
185190
'''
186191
Algorithm to identify particular text nodes as headings
187192
along with the text node's line number.
@@ -192,9 +197,9 @@ def findBadTitlesInDoctree(document: docutils.nodes.document) -> Generator[List[
192197
193198
However, the problem occurs when we encounter text that has been either
194199
italicized, bolded, referenced, etc. In these situations, the tagname of
195-
the parent node could be one of the following: 'emphasis', 'strong', 'reference', 'literal',
196-
stored in the 'listOfMarkers' set variable. In this situation, the node's
197-
grandparent would have the 'title' tagname instead.
200+
the parent node could be one of the following: 'emphasis', 'strong',
201+
'reference', and 'literal', stored in the 'listOfMarkers' set variable. In
202+
this situation, the node's grandparent would have the 'title' tagname instead.
198203
199204
Let's see an example that can cause a problem. The heading provided will be
200205
'Looking at *pandas* docs' versus 'Looking at pandas docs'. In this example,
@@ -203,7 +208,7 @@ def findBadTitlesInDoctree(document: docutils.nodes.document) -> Generator[List[
203208
204209
'Looking at *pandas* docs' 'Looking at pandas docs'
205210
title title
206-
/ | \ |
211+
/ | | |
207212
#text emphasis #text VS #text
208213
| | | |
209214
'Looking at' #text 'docs' 'Looking at pandas docs'
@@ -225,7 +230,7 @@ def findBadTitlesInDoctree(document: docutils.nodes.document) -> Generator[List[
225230
226231
'''
227232

228-
# Initialize an empty string. myText will be used to construct headings and append into titleList
233+
# myText will be used to construct headings and append into titleList
229234
myText: str = ""
230235

231236
# A docutils.nodes object that stores a listOfMarkers text's grandparent node,
@@ -239,7 +244,7 @@ def findBadTitlesInDoctree(document: docutils.nodes.document) -> Generator[List[
239244
# titleList is the list of headings that is encountered in the doctree
240245
titleList: List[str] = []
241246

242-
# A list of line numbers that the corresponding headings in titleList can be found at
247+
# List of line numbers that corresponding headings in titleList can be found at
243248
lineNumberList: List[int] = []
244249

245250
# Traverse through the nodes.Text in the document tree to construct headings
@@ -258,12 +263,12 @@ def findBadTitlesInDoctree(document: docutils.nodes.document) -> Generator[List[
258263
beforeMarker = False
259264
# Case 2: Encounter a node with parent tagname in listOfMarkers
260265
elif (node.parent.parent.tagname == 'title' and
261-
node.parent.tagname in listOfMarkers):
266+
node.parent.tagname in listOfMarkers):
262267
lineno = findLineNumber(node)
263268
myText = myText + node.astext()
264269
beforeMarker = True
265270
markerGrandparent = node.parent.parent
266-
# Case 3: Encounter a node with parent tagname from none of the above (Ex. 'paragraph' tagname)
271+
# Case 3: Encounter parent tagname of none of the above (Ex. 'paragraph')
267272
else:
268273
beforeMarker = False
269274
if (myText != ""):
@@ -272,14 +277,15 @@ def findBadTitlesInDoctree(document: docutils.nodes.document) -> Generator[List[
272277
myText = ""
273278
lineno = 0
274279

275-
# Sometimes, there is leftover string that hasn't been appended yet due to how the for loop works
280+
# Leftover string that hasn't been appended yet due to how the for loop works
276281
if (myText != ""):
277282
titleList.append(myText)
278283
lineNumberList.append(lineno)
279284

280285
# Return a list of the headings and a list of their corresponding line numbers
281286
return titleList, lineNumberList
282287

288+
283289
def fillBadTitleDictionary(rstFile: str) -> None:
284290
'''
285291
Method that prints all of the bad titles
@@ -298,10 +304,10 @@ def fillBadTitleDictionary(rstFile: str) -> None:
298304
# Parse rstFile with an RST parser
299305
document = parseRST(rstFile)
300306

301-
# Produce a list of headings along with their line numbers from the root document node
307+
# Make a list of headings along with their line numbers from document tree
302308
titleList, lineNumberList = findBadTitlesInDoctree(document)
303309

304-
# Append the badTitleDictionary if the capitalization convention for a heading is not followed
310+
# Append the badTitleDictionary if the capitalization convention not followed
305311
for i in range(len(titleList)):
306312
if not followCapitalizationConvention(titleList[i]):
307313
if rstFile not in badTitleDictionary:
@@ -319,7 +325,7 @@ def createRSTDirectoryList(source_paths: List[str]) -> List[str]:
319325
# List of .rst file paths
320326
f = []
321327

322-
# Loop through source_paths. If address is a folder, recursively look through the folder for .rst files
328+
# Loop through source_paths, recursively looking for .rst files
323329
for directoryAddress in source_paths:
324330
if not os.path.exists(directoryAddress):
325331
raise ValueError(
@@ -336,6 +342,7 @@ def createRSTDirectoryList(source_paths: List[str]) -> List[str]:
336342
# Return the filled up list of .rst file paths
337343
return f
338344

345+
339346
def main(source_paths: List[str], output_format: str) -> bool:
340347
'''
341348
The main method to execute all commands
@@ -357,14 +364,16 @@ def main(source_paths: List[str], output_format: str) -> bool:
357364
print()
358365
for key in badTitleDictionary:
359366
for titles in badTitleDictionary[key]:
360-
print(key + ":" + str(titles[1]) + ": " + errMessage + " \"" + titles[0] + "\"")
367+
print(key + ":" + str(titles[1]) + ": " + errMessage
368+
+ " \"" + titles[0] + "\""
369+
)
361370

362371
# Exit status of 1
363372
return True
364373

365374

366375
if __name__ == "__main__":
367-
parser = argparse.ArgumentParser(description = 'Validate capitalization for document headings')
376+
parser = argparse.ArgumentParser(description='Validate heading capitalization')
368377

369378
parser.add_argument(
370379
"paths", nargs="+", default=".", help="Source paths of file/directory to check."

0 commit comments

Comments
 (0)