Skip to content

Commit 0f1994b

Browse files
willkggsnedders
authored andcommitted
Document html5lib.treewalkers (#386)
1 parent 4ed8b8b commit 0f1994b

File tree

2 files changed

+128
-15
lines changed

2 files changed

+128
-15
lines changed

html5lib/treewalkers/__init__.py

+26-15
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,25 @@
2121
def getTreeWalker(treeType, implementation=None, **kwargs):
2222
"""Get a TreeWalker class for various types of tree with built-in support
2323
24-
Args:
25-
treeType (str): the name of the tree type required (case-insensitive).
26-
Supported values are:
27-
28-
- "dom": The xml.dom.minidom DOM implementation
29-
- "etree": A generic walker for tree implementations exposing an
30-
elementtree-like interface (known to work with
31-
ElementTree, cElementTree and lxml.etree).
32-
- "lxml": Optimized walker for lxml.etree
33-
- "genshi": a Genshi stream
34-
35-
Implementation: A module implementing the tree type e.g.
36-
xml.etree.ElementTree or cElementTree (Currently applies to the
37-
"etree" tree type only).
24+
:arg str treeType: the name of the tree type required (case-insensitive).
25+
Supported values are:
26+
27+
* "dom": The xml.dom.minidom DOM implementation
28+
* "etree": A generic walker for tree implementations exposing an
29+
elementtree-like interface (known to work with ElementTree,
30+
cElementTree and lxml.etree).
31+
* "lxml": Optimized walker for lxml.etree
32+
* "genshi": a Genshi stream
33+
34+
:arg implementation: A module implementing the tree type e.g.
35+
xml.etree.ElementTree or cElementTree (Currently applies to the "etree"
36+
tree type only).
37+
38+
:arg kwargs: keyword arguments passed to the etree walker--for other
39+
walkers, this has no effect
40+
41+
:returns: a TreeWalker class
42+
3843
"""
3944

4045
treeType = treeType.lower()
@@ -73,7 +78,13 @@ def concatenateCharacterTokens(tokens):
7378

7479

7580
def pprint(walker):
76-
"""Pretty printer for tree walkers"""
81+
"""Pretty printer for tree walkers
82+
83+
Takes a TreeWalker instance and pretty prints the output of walking the tree.
84+
85+
:arg walker: a TreeWalker instance
86+
87+
"""
7788
output = []
7889
indent = 0
7990
for token in concatenateCharacterTokens(walker):

html5lib/treewalkers/base.py

+102
Original file line numberDiff line numberDiff line change
@@ -18,34 +18,110 @@
1818

1919

2020
class TreeWalker(object):
21+
"""Walks a tree yielding tokens
22+
23+
Tokens are dicts that all have a ``type`` field specifying the type of the
24+
token.
25+
26+
"""
2127
def __init__(self, tree):
28+
"""Creates a TreeWalker
29+
30+
:arg tree: the tree to walk
31+
32+
"""
2233
self.tree = tree
2334

2435
def __iter__(self):
2536
raise NotImplementedError
2637

2738
def error(self, msg):
39+
"""Generates an error token with the given message
40+
41+
:arg msg: the error message
42+
43+
:returns: SerializeError token
44+
45+
"""
2846
return {"type": "SerializeError", "data": msg}
2947

3048
def emptyTag(self, namespace, name, attrs, hasChildren=False):
49+
"""Generates an EmptyTag token
50+
51+
:arg namespace: the namespace of the token--can be ``None``
52+
53+
:arg name: the name of the element
54+
55+
:arg attrs: the attributes of the element as a dict
56+
57+
:arg hasChildren: whether or not to yield a SerializationError because
58+
this tag shouldn't have children
59+
60+
:returns: EmptyTag token
61+
62+
"""
3163
yield {"type": "EmptyTag", "name": name,
3264
"namespace": namespace,
3365
"data": attrs}
3466
if hasChildren:
3567
yield self.error("Void element has children")
3668

3769
def startTag(self, namespace, name, attrs):
70+
"""Generates a StartTag token
71+
72+
:arg namespace: the namespace of the token--can be ``None``
73+
74+
:arg name: the name of the element
75+
76+
:arg attrs: the attributes of the element as a dict
77+
78+
:returns: StartTag token
79+
80+
"""
3881
return {"type": "StartTag",
3982
"name": name,
4083
"namespace": namespace,
4184
"data": attrs}
4285

4386
def endTag(self, namespace, name):
87+
"""Generates an EndTag token
88+
89+
:arg namespace: the namespace of the token--can be ``None``
90+
91+
:arg name: the name of the element
92+
93+
:returns: EndTag token
94+
95+
"""
4496
return {"type": "EndTag",
4597
"name": name,
4698
"namespace": namespace}
4799

48100
def text(self, data):
101+
"""Generates SpaceCharacters and Characters tokens
102+
103+
Depending on what's in the data, this generates one or more
104+
``SpaceCharacters`` and ``Characters`` tokens.
105+
106+
For example:
107+
108+
>>> from html5lib.treewalkers.base import TreeWalker
109+
>>> # Give it an empty tree just so it instantiates
110+
>>> walker = TreeWalker([])
111+
>>> list(walker.text(''))
112+
[]
113+
>>> list(walker.text(' '))
114+
[{u'data': ' ', u'type': u'SpaceCharacters'}]
115+
>>> list(walker.text(' abc ')) # doctest: +NORMALIZE_WHITESPACE
116+
[{u'data': ' ', u'type': u'SpaceCharacters'},
117+
{u'data': u'abc', u'type': u'Characters'},
118+
{u'data': u' ', u'type': u'SpaceCharacters'}]
119+
120+
:arg data: the text data
121+
122+
:returns: one or more ``SpaceCharacters`` and ``Characters`` tokens
123+
124+
"""
49125
data = data
50126
middle = data.lstrip(spaceCharacters)
51127
left = data[:len(data) - len(middle)]
@@ -60,18 +136,44 @@ def text(self, data):
60136
yield {"type": "SpaceCharacters", "data": right}
61137

62138
def comment(self, data):
139+
"""Generates a Comment token
140+
141+
:arg data: the comment
142+
143+
:returns: Comment token
144+
145+
"""
63146
return {"type": "Comment", "data": data}
64147

65148
def doctype(self, name, publicId=None, systemId=None):
149+
"""Generates a Doctype token
150+
151+
:arg name:
152+
153+
:arg publicId:
154+
155+
:arg systemId:
156+
157+
:returns: the Doctype token
158+
159+
"""
66160
return {"type": "Doctype",
67161
"name": name,
68162
"publicId": publicId,
69163
"systemId": systemId}
70164

71165
def entity(self, name):
166+
"""Generates an Entity token
167+
168+
:arg name: the entity name
169+
170+
:returns: an Entity token
171+
172+
"""
72173
return {"type": "Entity", "name": name}
73174

74175
def unknown(self, nodeType):
176+
"""Handles unknown node types"""
75177
return self.error("Unknown node type: " + nodeType)
76178

77179

0 commit comments

Comments
 (0)