Skip to content
This repository was archived by the owner on Apr 8, 2025. It is now read-only.

Generate json artifacts #43

Merged
merged 20 commits into from
Jun 14, 2018
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 64 additions & 1 deletion readthedocs_ext/readthedocs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,58 @@
from __future__ import absolute_import

import codecs
import json
import os
import re
import types
from distutils.version import LooseVersion

import sphinx
from sphinx import package_dir
from sphinx.builders.html import StandaloneHTMLBuilder, DirectoryHTMLBuilder, SingleFileHTMLBuilder
from sphinx.builders.html import (DirectoryHTMLBuilder, SingleFileHTMLBuilder,
StandaloneHTMLBuilder)
from sphinx.util.console import bold

from .embed import EmbedDirective
from .mixins import BuilderMixin

try:
# Avaliable from Sphinx 1.6
from sphinx.util.logging import getLogger
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sphinx 1.4 has the logging module, but not getLogger

except ImportError:
from logging import getLogger

log = getLogger(__name__)

MEDIA_MAPPING = {
"_static/jquery.js": "%sjavascript/jquery/jquery-2.0.3.min.js",
"_static/underscore.js": "%sjavascript/underscore.js",
"_static/doctools.js": "%sjavascript/doctools.js",
}


# Whitelist keys that we want to output
# to the json artifacts.
KEYS = [
'body',
'alabaster_version',
'display_toc',
'title',
'sourcename',
'customsidebar',
'metatags',
'current_page_name',
'next',
'rellinks',
'meta',
'parents',
'sidebars',
'toc',
'prev',
'page_source_suffix',
]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking into the code https://github.com/rtfd/readthedocs.org/blob/04ce7bb7a65a5d9ce2ad5a7b9a34185ef45c98ce/readthedocs/search/parse_json.py#L92-L124, we don't need all these keys. We only need:

  • current_page_name
  • body
  • title
  • toc

Should I remove the rest? Is it used in other parts of the code that I'm missing?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They are the only ones that we use currently. If there is other useful data, we should keep it, but I believe that's the primary data that we care about.



def finalize_media(app):
""" Point media files at our media server. """

Expand Down Expand Up @@ -126,6 +158,36 @@ def rtd_render(self, template, render_context):
app.builder.templates)


def generate_json_artifacts(app, pagename, templatename, context, doctree):
"""
Generate JSON artifacts for each page.

This way we can skip generating this in other build step.
"""
try:
# We need to get the output directory where the docs are built
# _build/html/_json.
build_json = os.path.abspath(
os.path.join(app.outdir, '_json')
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should output this to _build/json as we have normally? Both would work, but it might make the downstream changes less. For some operational reasons, we might want to keep the JSON & HTML seperated on the web machines, and keeping them out of the same path might make things easier.

outjson = os.path.join(build_json, pagename + '.fjson')
outdir = os.path.dirname(outjson)
if not os.path.exists(outdir):
os.makedirs(outdir)
with open(outjson, 'w+') as json_file:
to_context = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think the field should be blank rather than non existence if the key is missing.
It should be something like

to_context = { key: context.get(key, '') for key in KEYS}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes sense, so that the JSON always has the same keys.

key: context[key]
for key in KEYS
if key in context
}
json_file.write(json.dumps(to_context, indent=4))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what about using json.dump instead of dumps?
json.dump(to_context, json_file, indent=4)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's shorter

log.info('{page} processed.'.format(page=outjson))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we want to change/remove this logging?

except Exception:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably use a couple different try/except's here that are more scoped. I see a couple places where things could fail:

  • JSON dumping
  • Filesystem issues

Should probably catch them each specifically if possible, though having a wider try/except might be useful to stop builds from failing, also.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it

log.exception(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't make the build to fail (we need to throw the exception), it should fail?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can keep it from failing for now, and just log it.

'Failure in JSON search dump for {page}'.format(page=outjson)
)


class HtmlBuilderMixin(BuilderMixin):

static_readthedocs_files = [
Expand Down Expand Up @@ -213,6 +275,7 @@ def setup(app):
app.add_builder(ReadtheDocsSingleFileHTMLBuilderLocalMedia)
app.connect('builder-inited', finalize_media)
app.connect('html-page-context', update_body)
app.connect('html-page-context', generate_json_artifacts)

# Embed
app.add_directive('readthedocs-embed', EmbedDirective)
Expand Down
14 changes: 13 additions & 1 deletion tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@ class LanguageIntegrationTests(unittest.TestCase):

def _run_test(self, test_dir, test_file, test_string, builder='html'):
with build_output(test_dir, test_file, builder) as data:
self.assertIn(test_string, data)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I replate this to test more easily the output :)

if not isinstance(test_string, list):
test_strings = [test_string]
else:
test_strings = test_string
for string in test_strings:
self.assertIn(string, data)


class IntegrationTests(LanguageIntegrationTests):
Expand Down Expand Up @@ -56,3 +61,10 @@ def test_searchtools_is_patched(self):
HtmlBuilderMixin.REPLACEMENT_PATTERN
)
self.assertIn(HtmlBuilderMixin.REPLACEMENT_TEXT, data)

def test_generate_json_artifacts(self):
self._run_test(
'pyexample',
'_build/html/_json/index.fjson',
['current_page_name', 'title', 'body', 'toc'],
)
5 changes: 3 additions & 2 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tox]
envlist =
py{27,35,36}-sphinx{13,14,15,16}
py{27,35,36}-sphinx{13,14,15,16,17}
lint

[testenv]
Expand All @@ -13,7 +13,8 @@ deps =
sphinx13: Sphinx<1.4
sphinx14: Sphinx<1.5
sphinx15: Sphinx<1.6
sphinx16: Sphinx==1.6b3
sphinx16: Sphinx<1.7
sphinx17: Sphinx==1.7.4
commands =
py.test {posargs}

Expand Down