added line number to validation script

awu42 · awu42 · commit 50661c3af10f · 2020-01-17T08:36:07.000-05:00
diff --git a/scripts/validate_rst_title_capitalization.py b/scripts/validate_rst_title_capitalization.py
@@ -1,11 +1,13 @@
+#!/usr/bin/env python
+
 """Python script for collecting the titles in the rst files and validating
 if they follow the capitalization convention.  Prints the titles that do not
 follow the convention. Particularly used for .rst files in the doc/source folder
 
 NOTE: Run from the root directory of pandas repository
 
 Example:
-python ./scripts/validate_rst_title_capitalization.py doc/source/development/contributing.rst
+./scripts/validate_rst_title_capitalization.py doc/source/development/contributing.rst
 
 Files that cannot be validated: (code crashes when validating for some reason)
 doc/source/user_guide/io.rst
@@ -24,6 +26,43 @@
 import os
 from os import walk
 
+class suppress_stdout_stderr(object):
+    '''
+    Code source:
+    https://stackoverflow.com/questions/11130156/suppress-stdout-stderr-print-from-python-functions
+
+    A context manager for doing a "deep suppression" of stdout and stderr in
+    Python, i.e. will suppress all print, even if the print originates in a
+    compiled C/Fortran sub-function.
+    This will not suppress raised exceptions, since exceptions are printed
+    to stderr just before a script exits, and after the context manager has
+    exited (at least, I think that is why it lets exceptions through).
+
+    This code is needed to suppress output from the parser method
+    because the parser method prints to stdout when encountering Sphinx
+    references, as it cannot parse those at this moment.
+
+    '''
+    def __init__(self):
+        # Open a pair of null files
+        self.null_fds =  [os.open(os.devnull,os.O_RDWR) for x in range(2)]
+        # Save the actual stdout (1) and stderr (2) file descriptors.
+        self.save_fds = [os.dup(1), os.dup(2)]
+
+    def __enter__(self):
+        # Assign the null pointers to stdout and stderr.
+        os.dup2(self.null_fds[0],1)
+        os.dup2(self.null_fds[1],2)
+
+    def __exit__(self, *_):
+        # Re-assign the real stdout/stderr back to (1) and (2)
+        os.dup2(self.save_fds[0],1)
+        os.dup2(self.save_fds[1],2)
+        # Close all file descriptors
+        for fd in self.null_fds + self.save_fds:
+            os.close(fd)
+
+
 # Keynames that would not follow capitalization convention
 CAPITALIZATION_EXCEPTIONS = {
     'pandas', 'Python', 'IPython','PyTables', 'Excel', 'JSON',
@@ -35,11 +74,22 @@
 # Dictionary of bad titles that will be printed later
 badTitleDictionary = {}
 
+# List of problematic tags that are exceptions to parent rule
+listOfMarkers = {'emphasis', 'strong', 'reference', 'literal'}
+
 # List of files that, when validated, causes the program to crash
 cannotValidate = ['doc/source/user_guide/io.rst', 'doc/source/whatsnew/v0.17.1.rst']
 
-# Method returns true or false depending on whether title follows convention
-def followCapitalizationConvention(title):
+# Error Message:
+errMessage = "Title capitalization formatted incorrectly. Manually format correctly"
+
+
+def followCapitalizationConvention(title: str) -> bool:
+    '''
+    Method returns true or false depending on whether a title follows
+    the capitalization convention
+
+    '''
 
     # Lowercase representation of keynames
     keyNamesLower = {'pandas'}
@@ -80,8 +130,24 @@ def followCapitalizationConvention(title):
 
     return True
 
-# Method prints all of the bad titles
-def printBadTitles(rstFile):
+def findLineNumber(node: docutils.nodes) -> int:
+    '''
+    Method that finds the line number in a document for a particular node
+
+    '''
+    if (node.tagname == 'document'):
+        return 1
+    elif (node.line == None):
+        return findLineNumber(node.parent)
+    else:
+        return node.line - 1
+
+def fillBadTitleDictionary(rstFile: str) -> None:
+    '''
+    Method that prints all of the bad titles
+    Message: [directory of rstFile, line number of bad title, error message]
+
+    '''
     # Ensure file isn't one that causes the code to crash
     if rstFile in cannotValidate:
         return
@@ -99,15 +165,17 @@ def printBadTitles(rstFile):
         components=(docutils.parsers.rst.Parser,)
         ).get_default_values()
     document = docutils.utils.new_document('Document', settings)
-    parser.parse(input, document)
 
-    # print list of all the subtitles/headings that we want.
-    # Note: allParentTagsOfText = {'problematic', 'title', 'emphasis', 'inline', 'strong', 'literal', 'literal_block', 'title_reference', 'reference', 'paragraph'}
-    listOfMarkers = {'emphasis', 'strong', 'reference', 'literal'}
+    with suppress_stdout_stderr():
+        parser.parse(input, document)
+
+
+    # Fill up the titleList with lines that follow the title pattern
     myText = ""
     markerGrandparent = ""
     beforeMarker = False
     titleList = []
+    lineNumberList = []
     for node in document.traverse(nodes.Text):
         if (node.parent.tagname == 'title'):
             if (beforeMarker and markerGrandparent == node.parent):
@@ -116,28 +184,42 @@ def printBadTitles(rstFile):
             else:
                 if (myText != ""):
                     titleList.append(myText)
+                    lineNumberList.append(lineno)
+                lineno = findLineNumber(node)
                 myText = node.astext()
                 beforeMarker = False
         elif (node.parent.parent.tagname == 'title' and
             node.parent.tagname in listOfMarkers):
+            lineno = findLineNumber(node)
             myText = myText + node.astext()
             beforeMarker = True
             markerGrandparent = node.parent.parent
         else:
             beforeMarker = False
             if (myText != ""):
                 titleList.append(myText)
+                lineNumberList.append(lineno)
                 myText = ""
+                lineno = 0
 
     if (myText != ""):
         titleList.append(myText)
+        lineNumberList.append(lineno)
+
 
-    for text in titleList:
-        if not followCapitalizationConvention(text):
-            badTitleDictionary[rstFile].append(text)
+    # For each line in the titleList, append the badTitleDictionary if
+    # the capitalization convention is not followed
+    for i in range(len(titleList)):
+        if not followCapitalizationConvention(titleList[i]):
+            badTitleDictionary[rstFile].append((titleList[i], lineNumberList[i]))
 
-# Method finds all the bad titles, runs printBadTitles
-def findBadTitles(directoryAddress):
+
+def findBadTitles(directoryAddress: str) -> None:
+
+    '''
+    Method finds all the bad titles, runs fillBadTitleDictionary
+
+    '''
     f = []
     if (directoryAddress.endswith(".rst")):
         f.append(directoryAddress)
@@ -148,19 +230,25 @@ def findBadTitles(directoryAddress):
                     f.append(os.path.join(dirpath, file))
 
     for filename in f:
-        printBadTitles(filename)
+        fillBadTitleDictionary(filename)
 
 # Main Method
 if __name__ == "__main__":
     for i in range(1, len(sys.argv)):
         findBadTitles(sys.argv[i])
 
-    print("\n \nBAD TITLES \n \n")
+    print("BAD TITLES \n \n")
 
     # Print badTitleDictionary Results
+    printed = False
     for key in badTitleDictionary:
         if (len(badTitleDictionary[key]) != 0):
+            printed = True
             print(key)
             for titles in badTitleDictionary[key]:
                 print(titles)
             print()
+
+    # Exit code of 1 if there were bad titles
+    if (printed):
+        sys.exit(1)