diff --git a/tests/fuzz/README.md b/tests/fuzz/README.md new file mode 100644 index 00000000..87075a70 --- /dev/null +++ b/tests/fuzz/README.md @@ -0,0 +1,41 @@ +# OSS-Fuzz integration + +In principle, core Markdown parsing is designed to never except/crash on any input, +and so [fuzzing](https://en.wikipedia.org/wiki/Fuzzing) can be used to test this conformance. +This folder contains fuzzers which are principally run downstream as part of the infrastructure. + +Any file that matches `fuzz_*.py` in this repository will be built and run on OSS-Fuzz +(see ). + +See for full details. + +## CI integration + +Fuzzing essentially runs forever, or until a crash is found, therefore it cannot be fully integrated into local continous integration testing. +The workflow in `.github/workflows/fuzz.yml` though runs a brief fuzzing on code changed in a PR, +which can be used to provide early warning on code changes. + +## Reproducing crash failures + +If OSS-Fuzz (or the CI workflow) identifies a crash, it will produce a "minimized testcase" file +(e.g. ). + +To reproduce this crash locally, the easiest way is to run the [tox](https://tox.wiki/) environment, provided in this repository, against the test file (see `tox.ini`): + +``` +tox -e fuzz path/to/testcase +``` + +This idempotently sets up a local python environment with markdown-it-py (local dev) and [Atheris](https://pypi.org/project/atheris/) installed, +clones into it, +and builds the fuzzers. +Then the testcase is run within this environment. + +If you wish to simply run the full fuzzing process, +you can activate this environment, then run e.g.: + +``` +python .tox/fuzz/oss-fuzz/infra/helper.py run_fuzzer markdown-it-py fuzz_markdown +``` + +For a more thorough guide on reproducing, see: https://google.github.io/oss-fuzz/advanced-topics/reproducing/ diff --git a/tests/fuzz/fuzz_markdown.py b/tests/fuzz/fuzz_markdown.py new file mode 100644 index 00000000..d78ef697 --- /dev/null +++ b/tests/fuzz/fuzz_markdown.py @@ -0,0 +1,23 @@ +import sys + +import atheris + +from markdown_it import MarkdownIt + + +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + md = MarkdownIt() + raw_markdown = fdp.ConsumeUnicodeNoSurrogates(sys.maxsize) + md.parse(raw_markdown) + md.render(raw_markdown) + + +def main(): + atheris.instrument_all() + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/tests/fuzz/fuzz_markdown_extended.py b/tests/fuzz/fuzz_markdown_extended.py new file mode 100644 index 00000000..4ba749ee --- /dev/null +++ b/tests/fuzz/fuzz_markdown_extended.py @@ -0,0 +1,53 @@ +import sys + +import atheris + +# Beautified from auto-generated fuzzer at: +# https://github.com/ossf/fuzz-introspector/pull/872#issuecomment-1450847118 +# Auto-fuzz heuristics used: py-autofuzz-heuristics-4.1 +# Imports by the generated code +import markdown_it + + +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + val_1 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 1024)) + val_2 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 1024)) + val_3 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_4 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_5 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_6 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_7 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_8 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_9 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + val_10 = fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(0, 256)) + + try: + c1 = markdown_it.main.MarkdownIt() + c1.render(val_1) + c1.parse(val_2) + c1.renderInline(val_3) + c1.parseInline(val_4) + c1.normalizeLink(val_5) + c1.normalizeLinkText(val_6) + c1.disable(val_7) + c1.enable(val_8) + c1.validateLink(val_9) + c1.configure(val_10) + except ( + ValueError, + KeyError, + TypeError, + ): + # Exceptions thrown by the hit code. + pass + + +def main(): + atheris.instrument_all() + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main()