diff --git a/doc/html5lib.filters.rst b/doc/html5lib.filters.rst
index 38d4a956..d70e4552 100644
--- a/doc/html5lib.filters.rst
+++ b/doc/html5lib.filters.rst
@@ -6,54 +6,53 @@ filters Package
.. automodule:: html5lib.filters.base
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`alphabeticalattributes` Module
------------------------------------
.. automodule:: html5lib.filters.alphabeticalattributes
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`inject_meta_charset` Module
---------------------------------
.. automodule:: html5lib.filters.inject_meta_charset
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`lint` Module
------------------
.. automodule:: html5lib.filters.lint
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`optionaltags` Module
--------------------------
.. automodule:: html5lib.filters.optionaltags
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`sanitizer` Module
-----------------------
.. automodule:: html5lib.filters.sanitizer
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`whitespace` Module
------------------------
.. automodule:: html5lib.filters.whitespace
:members:
- :undoc-members:
:show-inheritance:
-
+ :special-members: __init__
diff --git a/doc/html5lib.rst b/doc/html5lib.rst
index 2a0b150f..d7c75c58 100644
--- a/doc/html5lib.rst
+++ b/doc/html5lib.rst
@@ -9,7 +9,6 @@ html5lib Package
.. automodule:: html5lib.constants
:members:
- :undoc-members:
:show-inheritance:
:mod:`html5parser` Module
@@ -17,16 +16,16 @@ html5lib Package
.. automodule:: html5lib.html5parser
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`serializer` Module
------------------------
.. automodule:: html5lib.serializer
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
Subpackages
-----------
@@ -37,4 +36,3 @@ Subpackages
html5lib.treebuilders
html5lib.treewalkers
html5lib.treeadapters
-
diff --git a/doc/html5lib.treeadapters.rst b/doc/html5lib.treeadapters.rst
index 6b2dc78d..1d3a9fba 100644
--- a/doc/html5lib.treeadapters.rst
+++ b/doc/html5lib.treeadapters.rst
@@ -1,4 +1,4 @@
-treebuilders Package
+treeadapters Package
====================
:mod:`~html5lib.treeadapters` Package
@@ -6,15 +6,15 @@ treebuilders Package
.. automodule:: html5lib.treeadapters
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
.. automodule:: html5lib.treeadapters.genshi
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
.. automodule:: html5lib.treeadapters.sax
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
diff --git a/doc/html5lib.treebuilders.rst b/doc/html5lib.treebuilders.rst
index aee82142..1a051e50 100644
--- a/doc/html5lib.treebuilders.rst
+++ b/doc/html5lib.treebuilders.rst
@@ -6,38 +6,37 @@ treebuilders Package
.. automodule:: html5lib.treebuilders
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`base` Module
-------------------
.. automodule:: html5lib.treebuilders.base
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`dom` Module
-----------------
.. automodule:: html5lib.treebuilders.dom
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`etree` Module
-------------------
.. automodule:: html5lib.treebuilders.etree
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`etree_lxml` Module
------------------------
.. automodule:: html5lib.treebuilders.etree_lxml
:members:
- :undoc-members:
:show-inheritance:
-
+ :special-members: __init__
diff --git a/doc/html5lib.treewalkers.rst b/doc/html5lib.treewalkers.rst
index 085d8a98..4afef476 100644
--- a/doc/html5lib.treewalkers.rst
+++ b/doc/html5lib.treewalkers.rst
@@ -6,46 +6,45 @@ treewalkers Package
.. automodule:: html5lib.treewalkers
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`base` Module
------------------
.. automodule:: html5lib.treewalkers.base
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`dom` Module
-----------------
.. automodule:: html5lib.treewalkers.dom
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`etree` Module
-------------------
.. automodule:: html5lib.treewalkers.etree
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`etree_lxml` Module
------------------------
.. automodule:: html5lib.treewalkers.etree_lxml
:members:
- :undoc-members:
:show-inheritance:
-
+ :special-members: __init__
:mod:`genshi` Module
--------------------
.. automodule:: html5lib.treewalkers.genshi
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
index 75765924..9d39b9d4 100644
--- a/html5lib/html5parser.py
+++ b/html5lib/html5parser.py
@@ -25,13 +25,48 @@
def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs):
- """Parse a string or file-like object into a tree"""
+ """Parse an HTML document as a string or file-like object into a tree
+
+ :arg doc: the document to parse as a string or file-like object
+
+ :arg treebuilder: the treebuilder to use when parsing
+
+ :arg namespaceHTMLElements: whether or not to namespace HTML elements
+
+ :returns: parsed tree
+
+ Example:
+
+ >>> from html5lib.html5parser import parse
+ >>> parse('
This is a doc
')
+
+
+ """
tb = treebuilders.getTreeBuilder(treebuilder)
p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
return p.parse(doc, **kwargs)
def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs):
+ """Parse an HTML fragment as a string or file-like object into a tree
+
+ :arg doc: the fragment to parse as a string or file-like object
+
+ :arg container: the container context to parse the fragment in
+
+ :arg treebuilder: the treebuilder to use when parsing
+
+ :arg namespaceHTMLElements: whether or not to namespace HTML elements
+
+ :returns: parsed tree
+
+ Example:
+
+ >>> from html5lib.html5libparser import parseFragment
+ >>> parseFragment('this is a fragment')
+
+
+ """
tb = treebuilders.getTreeBuilder(treebuilder)
p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
return p.parseFragment(doc, container=container, **kwargs)
@@ -50,16 +85,30 @@ def __new__(meta, classname, bases, classDict):
class HTMLParser(object):
- """HTML parser. Generates a tree structure from a stream of (possibly
- malformed) HTML"""
+ """HTML parser
+
+ Generates a tree structure from a stream of (possibly malformed) HTML.
+
+ """
def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False):
"""
- strict - raise an exception when a parse error is encountered
+ :arg tree: a treebuilder class controlling the type of tree that will be
+ returned. Built in treebuilders can be accessed through
+ html5lib.treebuilders.getTreeBuilder(treeType)
+
+ :arg strict: raise an exception when a parse error is encountered
+
+ :arg namespaceHTMLElements: whether or not to namespace HTML elements
+
+ :arg debug: whether or not to enable debug mode which logs things
+
+ Example:
+
+ >>> from html5lib.html5parser import HTMLParser
+ >>> parser = HTMLParser() # generates parser with etree builder
+ >>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict
- tree - a treebuilder class controlling the type of tree that will be
- returned. Built in treebuilders can be accessed through
- html5lib.treebuilders.getTreeBuilder(treeType)
"""
# Raise an exception on the first error encountered
@@ -123,9 +172,8 @@ def reset(self):
@property
def documentEncoding(self):
- """The name of the character encoding
- that was used to decode the input stream,
- or :obj:`None` if that is not determined yet.
+ """Name of the character encoding that was used to decode the input stream, or
+ :obj:`None` if that is not determined yet
"""
if not hasattr(self, 'tokenizer'):
@@ -219,14 +267,24 @@ def normalizedTokens(self):
def parse(self, stream, *args, **kwargs):
"""Parse a HTML document into a well-formed tree
- stream - a filelike object or string containing the HTML to be parsed
+ :arg stream: a file-like object or string containing the HTML to be parsed
+
+ The optional encoding parameter must be a string that indicates
+ the encoding. If specified, that encoding will be used,
+ regardless of any BOM or later declaration (such as in a meta
+ element).
+
+ :arg scripting: treat noscript elements as if JavaScript was turned on
- The optional encoding parameter must be a string that indicates
- the encoding. If specified, that encoding will be used,
- regardless of any BOM or later declaration (such as in a meta
- element)
+ :returns: parsed tree
+
+ Example:
+
+ >>> from html5lib.html5parser import HTMLParser
+ >>> parser = HTMLParser()
+ >>> parser.parse('This is a doc
')
+
- scripting - treat noscript elements as if javascript was turned on
"""
self._parse(stream, False, None, *args, **kwargs)
return self.tree.getDocument()
@@ -234,17 +292,27 @@ def parse(self, stream, *args, **kwargs):
def parseFragment(self, stream, *args, **kwargs):
"""Parse a HTML fragment into a well-formed tree fragment
- container - name of the element we're setting the innerHTML property
- if set to None, default to 'div'
+ :arg container: name of the element we're setting the innerHTML
+ property if set to None, default to 'div'
+
+ :arg stream: a file-like object or string containing the HTML to be parsed
+
+ The optional encoding parameter must be a string that indicates
+ the encoding. If specified, that encoding will be used,
+ regardless of any BOM or later declaration (such as in a meta
+ element)
- stream - a filelike object or string containing the HTML to be parsed
+ :arg scripting: treat noscript elements as if JavaScript was turned on
- The optional encoding parameter must be a string that indicates
- the encoding. If specified, that encoding will be used,
- regardless of any BOM or later declaration (such as in a meta
- element)
+ :returns: parsed tree
+
+ Example:
+
+ >>> from html5lib.html5libparser import HTMLParser
+ >>> parser = HTMLParser()
+ >>> parser.parseFragment('this is a fragment')
+
- scripting - treat noscript elements as if javascript was turned on
"""
self._parse(stream, True, *args, **kwargs)
return self.tree.getFragment()
@@ -258,8 +326,7 @@ def parseError(self, errorcode="XXX-undefined-error", datavars=None):
raise ParseError(E[errorcode] % datavars)
def normalizeToken(self, token):
- """ HTML5 specific normalizations to the token stream """
-
+ # HTML5 specific normalizations to the token stream
if token["type"] == tokenTypes["StartTag"]:
raw = token["data"]
token["data"] = OrderedDict(raw)
@@ -327,9 +394,7 @@ def resetInsertionMode(self):
self.phase = new_phase
def parseRCDataRawtext(self, token, contentType):
- """Generic RCDATA/RAWTEXT Parsing algorithm
- contentType - RCDATA or RAWTEXT
- """
+ # Generic RCDATA/RAWTEXT Parsing algorithm
assert contentType in ("RAWTEXT", "RCDATA")
self.tree.insertElement(token)
diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py
index 9e19a559..402b722e 100644
--- a/html5lib/treewalkers/__init__.py
+++ b/html5lib/treewalkers/__init__.py
@@ -13,7 +13,7 @@
from .. import constants
from .._utils import default_etree
-__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshi", "etree_lxml"]
+__all__ = ["getTreeWalker", "pprint"]
treeWalkerCache = {}