Skip to content

Commit fde67c2

Browse files
author
Mark R. Tuttle
committed
Add markdown preprocessing scripts for use before doxygen formatting
* Append "last modified" dates to the end of all markdown files. * Use a pandoc filter to rewrite links in cprover-manual (links originally intended to work with the cprover.org/cprover-manual javascripts). * Add a script to repair fenced code blocks written by pandoc
1 parent 354f8c9 commit fde67c2

File tree

5 files changed

+267
-0
lines changed

5 files changed

+267
-0
lines changed
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#!/usr/bin/env python3
2+
3+
import git
4+
import logging
5+
import subprocess
6+
import sys
7+
from pathlib import Path
8+
9+
def append_last_modified_date(repo, path):
10+
"""Append last modified date to a file"""
11+
12+
# Use the author date %ai as last modified date and not the commit date %ci
13+
# The author date is what 'git log' prints from the command line
14+
date = repo.git.log("-1", "--format=%ai", path)
15+
16+
with open(path, "a") as handle:
17+
# append two newlines to guarantee a paragraph break in the
18+
# new markdown file in the event the file does not already end
19+
# with a newline
20+
print(f"\n\nLast modified: {date}", file=handle)
21+
22+
def append_last_modified_dates(paths):
23+
paths = [Path(path).resolve() for path in paths]
24+
if not paths:
25+
logging.info("Failed to append last modified dates: list of files is empty")
26+
sys.exit(1)
27+
28+
repo = git.Repo(paths[0], search_parent_directories=True)
29+
if repo.is_dirty():
30+
logging.info("Failed to append last modified dates: repository has uncommitted changes")
31+
sys.exit(1)
32+
33+
for path in paths:
34+
append_last_modified_date(repo, path)
35+
36+
def main():
37+
fmt = '%(levelname)s: %(message)s'
38+
logging.basicConfig(level=logging.INFO, format=fmt)
39+
append_last_modified_dates(sys.argv[1:])
40+
41+
if __name__ == "__main__":
42+
main()
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#!/usr/bin/env python3
2+
import argparse
3+
import logging
4+
import subprocess
5+
import os
6+
import re
7+
8+
def create_parser(options=None, description=None):
9+
"""Create a parser for command line arguments."""
10+
11+
options = options or []
12+
description = description or ""
13+
14+
flags = [option.get('flag') for option in options]
15+
if '--verbose' not in flags:
16+
options.append({'flag': '--verbose', 'action': 'store_true', 'help': 'Verbose output'})
17+
if '--debug' not in flags:
18+
options.append({'flag': '--debug', 'action': 'store_true', 'help': 'Debug output'})
19+
20+
parser = argparse.ArgumentParser(description=description)
21+
for option in options:
22+
flag = option.pop('flag')
23+
parser.add_argument(flag, **option)
24+
return parser
25+
26+
def configure_logging(args):
27+
"""Configure logging level based on command line arguments."""
28+
29+
# Logging is configured by first invocation of basicConfig
30+
fmt = '%(levelname)s: %(message)s'
31+
if args.debug:
32+
logging.basicConfig(level=logging.DEBUG, format=fmt)
33+
return
34+
if args.verbose:
35+
logging.basicConfig(level=logging.INFO, format=fmt)
36+
return
37+
logging.basicConfig(format=fmt)
38+
39+
def parse_arguments():
40+
41+
options = [
42+
{'flag': '--pandoc-write',
43+
'default': 'markdown_phpextra',
44+
'help': 'pandoc --write option'},
45+
{'flag': '--pandoc-wrap',
46+
'default': 'none',
47+
'help': 'pandoc --auto option'},
48+
{'flag': 'file',
49+
'nargs': '*',
50+
'help': 'markdown files'},
51+
]
52+
return create_parser(options, "Prepare markdown for doxygen").parse_args()
53+
54+
55+
def pandoc(path, pandoc_write, pandoc_wrap, pandoc_filter=None):
56+
args = {
57+
'--write': pandoc_write,
58+
'--wrap', pandoc_wrap
59+
}
60+
if pandoc_filter:
61+
args['--filter'] = Path(pandoc_filter).resolve()
62+
63+
64+
lines = subprocess.run(['pandoc', **args, path],
65+
check=True,
66+
text=True,
67+
capture_output=True).stdout.splitlines()
68+
return [patch_code_block(line) for line in lines]
69+
70+
################################################################
71+
72+
def test_patch_code_block():
73+
assert patch_code_block("``` c") == "```c"
74+
assert patch_code_block("``` sh") == "```sh"
75+
assert patch_code_block("~~~ c") == "~~~c"
76+
assert patch_code_block("~~~ sh") == "~~~sh"
77+
assert patch_code_block("```c") == "```c"
78+
assert patch_code_block("``` ") == "``` "
79+
80+
def test_patch_link_target():
81+
assert patch_link_target("../../helpful/cow/") == "helpful-cow.md"
82+
assert patch_link_target("helpful/cow") == "helpful-cow.md"
83+
assert patch_link_target("helpful/cow/") == "helpful-cow.md"
84+
assert patch_link_target("helpful-cow/") == "helpful-cow.md"
85+
86+
def test_patch_link():
87+
assert patch_link("[a](../../helpful/cow/)") == "[a](helpful-cow.md)"
88+
assert patch_link("[a](helpful/cow)") == "[a](helpful-cow.md)"
89+
assert patch_link("[a](helpful/cow/)") == "[a](helpful-cow.md)"
90+
assert patch_link("[a](helpful-cow/)") == "[a](helpful-cow.md)"
91+
92+
def test_patch_links():
93+
assert patch_links("a b [a](../../helpful/cow/) x [a](../../helpful/cow/)") == "a b [a](helpful-cow.md) x [a](helpful-cow.md)"
94+
95+
################################################################
96+
97+
def main():
98+
args = parse_arguments()
99+
configure_logging(args)
100+
101+
for path in args.file:
102+
lines = pandoc(path, args.pandoc_write, args.pandoc_wrap)
103+
lines = [patch_links(line) for line in lines]
104+
for line in lines:
105+
print(line)
106+
107+
if __name__ == "__main__":
108+
main()
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#!/bin/bash
2+
3+
set -euo pipefail
4+
5+
BINDIR=$(dirname $(realpath ${BASH_SOURCE[0]}))
6+
FILES=$(find . -name '*.md')
7+
8+
# Append the last modified date to the end of every markdown file
9+
10+
echo
11+
echo "Appending last modified dates to markdown files"
12+
$BINDIR/append-last-modified-dates.py $FILES
13+
14+
# Doxygen parses incorrectly a link [what a link](what-a-link.md) that
15+
# is broken over two lines.
16+
# Doxygen requires that headings '# heading' have labels '{# heading}'
17+
# for section linking to work. The markdown extension "php Markdown
18+
# Extra" supports section labels.
19+
# Use pandoc to remove line breaks from paragraphs and to output a
20+
# markdown extension with section labels.
21+
# Note: Need to read markdown as markdown_phpextra and not default
22+
# markdown to preserve doxygen pragmas like \ingroup.
23+
24+
# Bug: This is currently interacting badly with \dot in cprover markdown
25+
26+
# echo
27+
# echo "Running pandoc over markdown files"
28+
# for file in $FILES; do
29+
# echo $file
30+
# tmp=/tmp/${file%.*}1.md
31+
# mkdir -p $(dirname $tmp)
32+
# cp $file $tmp
33+
# pandoc --read=markdown_phpextra --write=markdown_phpextra --wrap=none $tmp | \
34+
# $BINDIR/pandoc-codeblock-repair.sh > $file
35+
# done
36+
37+
cprovers=$(find . -name cprover-manual)
38+
cprover=${cprovers[0]}
39+
40+
# Markdown files in cprover-manual have hierarchical links like
41+
# ../../pretty/cow/ that refer to the markdown file pretty-cow.md.
42+
# The site http://www.cprover.org/cprover-manual/ uses a javascript
43+
# script running in the browser to serve up pages from the
44+
# cprover-manual directory. Use a pandoc filter to patch up the
45+
# cprover-manual links before running doxygen.
46+
47+
echo
48+
echo "Running pandoc filter over cprover-manual markdown files"
49+
FILES=$(find $cprover -name '*.md')
50+
for file in $FILES; do
51+
echo $file
52+
tmp=/tmp/${file%.*}2.md
53+
mkdir -p $(dirname $tmp)
54+
cp $file $tmp
55+
pandoc --write=markdown_phpextra --wrap=none --filter=$BINDIR/pandoc-cprover-link-filter.py $tmp |
56+
$BINDIR/pandoc-codeblock-repair.sh > $file
57+
done
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/bin/sh
2+
3+
# This script strips spaces from syntax highlighting for code blocks
4+
# in markdown documents.
5+
6+
# Pandoc outputs ``` c and ~~~ c but and doxygen expects ```c and ~~~c.
7+
# Pandoc ouputs leading spaces before ``` and ~~~ when the code block is
8+
# part of a list item.
9+
10+
sed 's/^\( *```\) */\1/' | sed 's/^\( *~~~\) */\1/'
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#!/usr/bin/env python3
2+
3+
# https://pandoc.org/MANUAL.html
4+
# https://pandoc.org/filters.html
5+
# AST: https://hackage.haskell.org/package/pandoc-types-1.22.2.1/docs/Text-Pandoc-Definition.html
6+
7+
from pandocfilters import toJSONFilter, Link
8+
9+
def patch_url(url):
10+
11+
# cbmc-tutorial.md links directly to source files used in examples; link to local copies instead
12+
raw_url = 'https://raw.githubusercontent.com/diffblue/cbmc/develop/doc/cprover-manual/'
13+
if url.startswith(raw_url):
14+
return url[len(raw_url):]
15+
16+
if url.startswith('http://') or url.startswith('https://'):
17+
return url
18+
19+
try:
20+
path, label = url.rsplit('#', 1)
21+
except ValueError:
22+
path, label = url, ''
23+
24+
# Flatten hierarchical urls in cprover-manual to a flat set of markdown files
25+
# Map a url like ../../helpful/cow/ to helpful-cow.md
26+
# Map a url like . or .. to . (not index.md since index.md is doxygen mainpage)
27+
28+
parts = [part for part in path.split('/') if part and part != '.' and part != '..']
29+
new_path = '-'.join(parts) + '.md' if parts else ''
30+
31+
new_url = f'{new_path}#{label}' if label else new_path
32+
if new_url:
33+
return new_url
34+
return '.'
35+
36+
def test_patch_url():
37+
assert patch_url("../../helpful/cow/") == "helpful-cow.md"
38+
assert patch_url("helpful/cow") == "helpful-cow.md"
39+
assert patch_url("helpful/cow/") == "helpful-cow.md"
40+
assert patch_url("helpful-cow/") == "helpful-cow.md"
41+
42+
def patch_link(key, value, _format, _meta):
43+
if key == 'Link':
44+
attr, alt_text, link = value
45+
url, title = link
46+
return Link(attr, alt_text, [patch_url(url), title])
47+
return None
48+
49+
if __name__ == "__main__":
50+
toJSONFilter(patch_link)

0 commit comments

Comments
 (0)