diff --git a/html5lib/treebuilders/__init__.py b/html5lib/treebuilders/__init__.py
index e2328847..d44447ea 100644
--- a/html5lib/treebuilders/__init__.py
+++ b/html5lib/treebuilders/__init__.py
@@ -1,29 +1,32 @@
-"""A collection of modules for building different kinds of tree from
-HTML documents.
+"""A collection of modules for building different kinds of trees from HTML
+documents.
To create a treebuilder for a new type of tree, you need to do
implement several things:
-1) A set of classes for various types of elements: Document, Doctype,
-Comment, Element. These must implement the interface of
-_base.treebuilders.Node (although comment nodes have a different
-signature for their constructor, see treebuilders.etree.Comment)
-Textual content may also be implemented as another node type, or not, as
-your tree implementation requires.
-
-2) A treebuilder object (called TreeBuilder by convention) that
-inherits from treebuilders._base.TreeBuilder. This has 4 required attributes:
-documentClass - the class to use for the bottommost node of a document
-elementClass - the class to use for HTML Elements
-commentClass - the class to use for comments
-doctypeClass - the class to use for doctypes
-It also has one required method:
-getDocument - Returns the root node of the complete document tree
-
-3) If you wish to run the unit tests, you must also create a
-testSerializer method on your treebuilder which accepts a node and
-returns a string containing Node and its children serialized according
-to the format used in the unittests
+1. A set of classes for various types of elements: Document, Doctype, Comment,
+ Element. These must implement the interface of ``base.treebuilders.Node``
+ (although comment nodes have a different signature for their constructor,
+ see ``treebuilders.etree.Comment``) Textual content may also be implemented
+ as another node type, or not, as your tree implementation requires.
+
+2. A treebuilder object (called ``TreeBuilder`` by convention) that inherits
+ from ``treebuilders.base.TreeBuilder``. This has 4 required attributes:
+
+ * ``documentClass`` - the class to use for the bottommost node of a document
+ * ``elementClass`` - the class to use for HTML Elements
+ * ``commentClass`` - the class to use for comments
+ * ``doctypeClass`` - the class to use for doctypes
+
+ It also has one required method:
+
+ * ``getDocument`` - Returns the root node of the complete document tree
+
+3. If you wish to run the unit tests, you must also create a ``testSerializer``
+ method on your treebuilder which accepts a node and returns a string
+ containing Node and its children serialized according to the format used in
+ the unittests
+
"""
from __future__ import absolute_import, division, unicode_literals
@@ -34,23 +37,32 @@
def getTreeBuilder(treeType, implementation=None, **kwargs):
- """Get a TreeBuilder class for various types of tree with built-in support
-
- treeType - the name of the tree type required (case-insensitive). Supported
- values are:
-
- "dom" - A generic builder for DOM implementations, defaulting to
- a xml.dom.minidom based implementation.
- "etree" - A generic builder for tree implementations exposing an
- ElementTree-like interface, defaulting to
- xml.etree.cElementTree if available and
- xml.etree.ElementTree if not.
- "lxml" - A etree-based builder for lxml.etree, handling
- limitations of lxml's implementation.
-
- implementation - (Currently applies to the "etree" and "dom" tree types). A
- module implementing the tree type e.g.
- xml.etree.ElementTree or xml.etree.cElementTree."""
+ """Get a TreeBuilder class for various types of trees with built-in support
+
+ :arg treeType: the name of the tree type required (case-insensitive). Supported
+ values are:
+
+ * "dom" - A generic builder for DOM implementations, defaulting to a
+ xml.dom.minidom based implementation.
+ * "etree" - A generic builder for tree implementations exposing an
+ ElementTree-like interface, defaulting to xml.etree.cElementTree if
+ available and xml.etree.ElementTree if not.
+ * "lxml" - A etree-based builder for lxml.etree, handling limitations
+ of lxml's implementation.
+
+ :arg implementation: (Currently applies to the "etree" and "dom" tree
+ types). A module implementing the tree type e.g. xml.etree.ElementTree
+ or xml.etree.cElementTree.
+
+ :arg kwargs: Any additional options to pass to the TreeBuilder when
+ creating it.
+
+ Example:
+
+ >>> from html5lib.treebuilders import getTreeBuilder
+ >>> builder = getTreeBuilder('etree')
+
+ """
treeType = treeType.lower()
if treeType not in treeBuilderCache:
diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py
index a4b2792a..05d97ecc 100644
--- a/html5lib/treebuilders/base.py
+++ b/html5lib/treebuilders/base.py
@@ -21,22 +21,25 @@
class Node(object):
+ """Represents an item in the tree"""
def __init__(self, name):
- """Node representing an item in the tree.
- name - The tag name associated with the node
- parent - The parent of the current node (or None for the document node)
- value - The value of the current node (applies to text nodes and
- comments
- attributes - a dict holding name, value pairs for attributes of the node
- childNodes - a list of child nodes of the current node. This must
- include all elements but not necessarily other node types
- _flags - A list of miscellaneous flags that can be set on the node
+ """Creates a Node
+
+ :arg name: The tag name associated with the node
+
"""
+ # The tag name assocaited with the node
self.name = name
+ # The parent of the current node (or None for the document node)
self.parent = None
+ # The value of the current node (applies to text nodes and comments)
self.value = None
+ # A dict holding name -> value pairs for attributes of the node
self.attributes = {}
+ # A list of child nodes of the current node. This must include all
+ # elements but not necessarily other node types.
self.childNodes = []
+ # A list of miscellaneous flags that can be set on the node.
self._flags = []
def __str__(self):
@@ -53,23 +56,41 @@ def __repr__(self):
def appendChild(self, node):
"""Insert node as a child of the current node
+
+ :arg node: the node to insert
+
"""
raise NotImplementedError
def insertText(self, data, insertBefore=None):
"""Insert data as text in the current node, positioned before the
start of node insertBefore or to the end of the node's text.
+
+ :arg data: the data to insert
+
+ :arg insertBefore: True if you want to insert the text before the node
+ and False if you want to insert it after the node
+
"""
raise NotImplementedError
def insertBefore(self, node, refNode):
"""Insert node as a child of the current node, before refNode in the
list of child nodes. Raises ValueError if refNode is not a child of
- the current node"""
+ the current node
+
+ :arg node: the node to insert
+
+ :arg refNode: the child node to insert the node before
+
+ """
raise NotImplementedError
def removeChild(self, node):
"""Remove node from the children of the current node
+
+ :arg node: the child node to remove
+
"""
raise NotImplementedError
@@ -77,6 +98,9 @@ def reparentChildren(self, newParent):
"""Move all the children of the current node to newParent.
This is needed so that trees that don't store text as nodes move the
text in the correct way
+
+ :arg newParent: the node to move all this node's children to
+
"""
# XXX - should this method be made more general?
for child in self.childNodes:
@@ -121,10 +145,12 @@ def nodesEqual(self, node1, node2):
class TreeBuilder(object):
"""Base treebuilder implementation
- documentClass - the class to use for the bottommost node of a document
- elementClass - the class to use for HTML Elements
- commentClass - the class to use for comments
- doctypeClass - the class to use for doctypes
+
+ * documentClass - the class to use for the bottommost node of a document
+ * elementClass - the class to use for HTML Elements
+ * commentClass - the class to use for comments
+ * doctypeClass - the class to use for doctypes
+
"""
# pylint:disable=not-callable
@@ -144,6 +170,11 @@ class TreeBuilder(object):
fragmentClass = None
def __init__(self, namespaceHTMLElements):
+ """Create a TreeBuilder
+
+ :arg namespaceHTMLElements: whether or not to namespace HTML elements
+
+ """
if namespaceHTMLElements:
self.defaultNamespace = "http://www.w3.org/1999/xhtml"
else:
@@ -367,11 +398,11 @@ def generateImpliedEndTags(self, exclude=None):
self.generateImpliedEndTags(exclude)
def getDocument(self):
- "Return the final tree"
+ """Return the final tree"""
return self.document
def getFragment(self):
- "Return the final fragment"
+ """Return the final fragment"""
# assert self.innerHTML
fragment = self.fragmentClass()
self.openElements[0].reparentChildren(fragment)
@@ -379,5 +410,8 @@ def getFragment(self):
def testSerializer(self, node):
"""Serialize the subtree of node in the format required by unit tests
- node - the node from which to start serializing"""
+
+ :arg node: the node from which to start serializing
+
+ """
raise NotImplementedError
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index 908820c0..ca12a99c 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -309,7 +309,6 @@ def insertCommentMain(self, data, parent=None):
super(TreeBuilder, self).insertComment(data, parent)
def insertRoot(self, token):
- """Create the document root"""
# Because of the way libxml2 works, it doesn't seem to be possible to
# alter information like the doctype after the tree has been parsed.
# Therefore we need to use the built-in parser to create our initial