diff --git a/doc/html5lib.filters.rst b/doc/html5lib.filters.rst index 38d4a956..d70e4552 100644 --- a/doc/html5lib.filters.rst +++ b/doc/html5lib.filters.rst @@ -6,54 +6,53 @@ filters Package .. automodule:: html5lib.filters.base :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`alphabeticalattributes` Module ------------------------------------ .. automodule:: html5lib.filters.alphabeticalattributes :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`inject_meta_charset` Module --------------------------------- .. automodule:: html5lib.filters.inject_meta_charset :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`lint` Module ------------------ .. automodule:: html5lib.filters.lint :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`optionaltags` Module -------------------------- .. automodule:: html5lib.filters.optionaltags :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`sanitizer` Module ----------------------- .. automodule:: html5lib.filters.sanitizer :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`whitespace` Module ------------------------ .. automodule:: html5lib.filters.whitespace :members: - :undoc-members: :show-inheritance: - + :special-members: __init__ diff --git a/doc/html5lib.rst b/doc/html5lib.rst index 2a0b150f..d7c75c58 100644 --- a/doc/html5lib.rst +++ b/doc/html5lib.rst @@ -9,7 +9,6 @@ html5lib Package .. automodule:: html5lib.constants :members: - :undoc-members: :show-inheritance: :mod:`html5parser` Module @@ -17,16 +16,16 @@ html5lib Package .. automodule:: html5lib.html5parser :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`serializer` Module ------------------------ .. automodule:: html5lib.serializer :members: - :undoc-members: :show-inheritance: + :special-members: __init__ Subpackages ----------- @@ -37,4 +36,3 @@ Subpackages html5lib.treebuilders html5lib.treewalkers html5lib.treeadapters - diff --git a/doc/html5lib.treeadapters.rst b/doc/html5lib.treeadapters.rst index 6b2dc78d..1d3a9fba 100644 --- a/doc/html5lib.treeadapters.rst +++ b/doc/html5lib.treeadapters.rst @@ -1,4 +1,4 @@ -treebuilders Package +treeadapters Package ==================== :mod:`~html5lib.treeadapters` Package @@ -6,15 +6,15 @@ treebuilders Package .. automodule:: html5lib.treeadapters :members: - :undoc-members: :show-inheritance: + :special-members: __init__ .. automodule:: html5lib.treeadapters.genshi :members: - :undoc-members: :show-inheritance: + :special-members: __init__ .. automodule:: html5lib.treeadapters.sax :members: - :undoc-members: :show-inheritance: + :special-members: __init__ diff --git a/doc/html5lib.treebuilders.rst b/doc/html5lib.treebuilders.rst index aee82142..1a051e50 100644 --- a/doc/html5lib.treebuilders.rst +++ b/doc/html5lib.treebuilders.rst @@ -6,38 +6,37 @@ treebuilders Package .. automodule:: html5lib.treebuilders :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`base` Module ------------------- .. automodule:: html5lib.treebuilders.base :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`dom` Module ----------------- .. automodule:: html5lib.treebuilders.dom :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`etree` Module ------------------- .. automodule:: html5lib.treebuilders.etree :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`etree_lxml` Module ------------------------ .. automodule:: html5lib.treebuilders.etree_lxml :members: - :undoc-members: :show-inheritance: - + :special-members: __init__ diff --git a/doc/html5lib.treewalkers.rst b/doc/html5lib.treewalkers.rst index 085d8a98..4afef476 100644 --- a/doc/html5lib.treewalkers.rst +++ b/doc/html5lib.treewalkers.rst @@ -6,46 +6,45 @@ treewalkers Package .. automodule:: html5lib.treewalkers :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`base` Module ------------------ .. automodule:: html5lib.treewalkers.base :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`dom` Module ----------------- .. automodule:: html5lib.treewalkers.dom :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`etree` Module ------------------- .. automodule:: html5lib.treewalkers.etree :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`etree_lxml` Module ------------------------ .. automodule:: html5lib.treewalkers.etree_lxml :members: - :undoc-members: :show-inheritance: - + :special-members: __init__ :mod:`genshi` Module -------------------- .. automodule:: html5lib.treewalkers.genshi :members: - :undoc-members: :show-inheritance: + :special-members: __init__ diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 75765924..9d39b9d4 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -25,13 +25,48 @@ def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs): - """Parse a string or file-like object into a tree""" + """Parse an HTML document as a string or file-like object into a tree + + :arg doc: the document to parse as a string or file-like object + + :arg treebuilder: the treebuilder to use when parsing + + :arg namespaceHTMLElements: whether or not to namespace HTML elements + + :returns: parsed tree + + Example: + + >>> from html5lib.html5parser import parse + >>> parse('

This is a doc

') + + + """ tb = treebuilders.getTreeBuilder(treebuilder) p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) return p.parse(doc, **kwargs) def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs): + """Parse an HTML fragment as a string or file-like object into a tree + + :arg doc: the fragment to parse as a string or file-like object + + :arg container: the container context to parse the fragment in + + :arg treebuilder: the treebuilder to use when parsing + + :arg namespaceHTMLElements: whether or not to namespace HTML elements + + :returns: parsed tree + + Example: + + >>> from html5lib.html5libparser import parseFragment + >>> parseFragment('this is a fragment') + + + """ tb = treebuilders.getTreeBuilder(treebuilder) p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) return p.parseFragment(doc, container=container, **kwargs) @@ -50,16 +85,30 @@ def __new__(meta, classname, bases, classDict): class HTMLParser(object): - """HTML parser. Generates a tree structure from a stream of (possibly - malformed) HTML""" + """HTML parser + + Generates a tree structure from a stream of (possibly malformed) HTML. + + """ def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False): """ - strict - raise an exception when a parse error is encountered + :arg tree: a treebuilder class controlling the type of tree that will be + returned. Built in treebuilders can be accessed through + html5lib.treebuilders.getTreeBuilder(treeType) + + :arg strict: raise an exception when a parse error is encountered + + :arg namespaceHTMLElements: whether or not to namespace HTML elements + + :arg debug: whether or not to enable debug mode which logs things + + Example: + + >>> from html5lib.html5parser import HTMLParser + >>> parser = HTMLParser() # generates parser with etree builder + >>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict - tree - a treebuilder class controlling the type of tree that will be - returned. Built in treebuilders can be accessed through - html5lib.treebuilders.getTreeBuilder(treeType) """ # Raise an exception on the first error encountered @@ -123,9 +172,8 @@ def reset(self): @property def documentEncoding(self): - """The name of the character encoding - that was used to decode the input stream, - or :obj:`None` if that is not determined yet. + """Name of the character encoding that was used to decode the input stream, or + :obj:`None` if that is not determined yet """ if not hasattr(self, 'tokenizer'): @@ -219,14 +267,24 @@ def normalizedTokens(self): def parse(self, stream, *args, **kwargs): """Parse a HTML document into a well-formed tree - stream - a filelike object or string containing the HTML to be parsed + :arg stream: a file-like object or string containing the HTML to be parsed + + The optional encoding parameter must be a string that indicates + the encoding. If specified, that encoding will be used, + regardless of any BOM or later declaration (such as in a meta + element). + + :arg scripting: treat noscript elements as if JavaScript was turned on - The optional encoding parameter must be a string that indicates - the encoding. If specified, that encoding will be used, - regardless of any BOM or later declaration (such as in a meta - element) + :returns: parsed tree + + Example: + + >>> from html5lib.html5parser import HTMLParser + >>> parser = HTMLParser() + >>> parser.parse('

This is a doc

') + - scripting - treat noscript elements as if javascript was turned on """ self._parse(stream, False, None, *args, **kwargs) return self.tree.getDocument() @@ -234,17 +292,27 @@ def parse(self, stream, *args, **kwargs): def parseFragment(self, stream, *args, **kwargs): """Parse a HTML fragment into a well-formed tree fragment - container - name of the element we're setting the innerHTML property - if set to None, default to 'div' + :arg container: name of the element we're setting the innerHTML + property if set to None, default to 'div' + + :arg stream: a file-like object or string containing the HTML to be parsed + + The optional encoding parameter must be a string that indicates + the encoding. If specified, that encoding will be used, + regardless of any BOM or later declaration (such as in a meta + element) - stream - a filelike object or string containing the HTML to be parsed + :arg scripting: treat noscript elements as if JavaScript was turned on - The optional encoding parameter must be a string that indicates - the encoding. If specified, that encoding will be used, - regardless of any BOM or later declaration (such as in a meta - element) + :returns: parsed tree + + Example: + + >>> from html5lib.html5libparser import HTMLParser + >>> parser = HTMLParser() + >>> parser.parseFragment('this is a fragment') + - scripting - treat noscript elements as if javascript was turned on """ self._parse(stream, True, *args, **kwargs) return self.tree.getFragment() @@ -258,8 +326,7 @@ def parseError(self, errorcode="XXX-undefined-error", datavars=None): raise ParseError(E[errorcode] % datavars) def normalizeToken(self, token): - """ HTML5 specific normalizations to the token stream """ - + # HTML5 specific normalizations to the token stream if token["type"] == tokenTypes["StartTag"]: raw = token["data"] token["data"] = OrderedDict(raw) @@ -327,9 +394,7 @@ def resetInsertionMode(self): self.phase = new_phase def parseRCDataRawtext(self, token, contentType): - """Generic RCDATA/RAWTEXT Parsing algorithm - contentType - RCDATA or RAWTEXT - """ + # Generic RCDATA/RAWTEXT Parsing algorithm assert contentType in ("RAWTEXT", "RCDATA") self.tree.insertElement(token) diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py index 9e19a559..402b722e 100644 --- a/html5lib/treewalkers/__init__.py +++ b/html5lib/treewalkers/__init__.py @@ -13,7 +13,7 @@ from .. import constants from .._utils import default_etree -__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshi", "etree_lxml"] +__all__ = ["getTreeWalker", "pprint"] treeWalkerCache = {}