18
18
19
19
20
20
class TreeWalker (object ):
21
+ """Walks a tree yielding tokens
22
+
23
+ Tokens are dicts that all have a ``type`` field specifying the type of the
24
+ token.
25
+
26
+ """
21
27
def __init__ (self , tree ):
28
+ """Creates a TreeWalker
29
+
30
+ :arg tree: the tree to walk
31
+
32
+ """
22
33
self .tree = tree
23
34
24
35
def __iter__ (self ):
25
36
raise NotImplementedError
26
37
27
38
def error (self , msg ):
39
+ """Generates an error token with the given message
40
+
41
+ :arg msg: the error message
42
+
43
+ :returns: SerializeError token
44
+
45
+ """
28
46
return {"type" : "SerializeError" , "data" : msg }
29
47
30
48
def emptyTag (self , namespace , name , attrs , hasChildren = False ):
49
+ """Generates an EmptyTag token
50
+
51
+ :arg namespace: the namespace of the token--can be ``None``
52
+
53
+ :arg name: the name of the element
54
+
55
+ :arg attrs: the attributes of the element as a dict
56
+
57
+ :arg hasChildren: whether or not to yield a SerializationError because
58
+ this tag shouldn't have children
59
+
60
+ :returns: EmptyTag token
61
+
62
+ """
31
63
yield {"type" : "EmptyTag" , "name" : name ,
32
64
"namespace" : namespace ,
33
65
"data" : attrs }
34
66
if hasChildren :
35
67
yield self .error ("Void element has children" )
36
68
37
69
def startTag (self , namespace , name , attrs ):
70
+ """Generates a StartTag token
71
+
72
+ :arg namespace: the namespace of the token--can be ``None``
73
+
74
+ :arg name: the name of the element
75
+
76
+ :arg attrs: the attributes of the element as a dict
77
+
78
+ :returns: StartTag token
79
+
80
+ """
38
81
return {"type" : "StartTag" ,
39
82
"name" : name ,
40
83
"namespace" : namespace ,
41
84
"data" : attrs }
42
85
43
86
def endTag (self , namespace , name ):
87
+ """Generates an EndTag token
88
+
89
+ :arg namespace: the namespace of the token--can be ``None``
90
+
91
+ :arg name: the name of the element
92
+
93
+ :returns: EndTag token
94
+
95
+ """
44
96
return {"type" : "EndTag" ,
45
97
"name" : name ,
46
98
"namespace" : namespace }
47
99
48
100
def text (self , data ):
101
+ """Generates SpaceCharacters and Characters tokens
102
+
103
+ Depending on what's in the data, this generates one or more
104
+ ``SpaceCharacters`` and ``Characters`` tokens.
105
+
106
+ For example:
107
+
108
+ >>> from html5lib.treewalkers.base import TreeWalker
109
+ >>> # Give it an empty tree just so it instantiates
110
+ >>> walker = TreeWalker([])
111
+ >>> list(walker.text(''))
112
+ []
113
+ >>> list(walker.text(' '))
114
+ [{u'data': ' ', u'type': u'SpaceCharacters'}]
115
+ >>> list(walker.text(' abc ')) # doctest: +NORMALIZE_WHITESPACE
116
+ [{u'data': ' ', u'type': u'SpaceCharacters'},
117
+ {u'data': u'abc', u'type': u'Characters'},
118
+ {u'data': u' ', u'type': u'SpaceCharacters'}]
119
+
120
+ :arg data: the text data
121
+
122
+ :returns: one or more ``SpaceCharacters`` and ``Characters`` tokens
123
+
124
+ """
49
125
data = data
50
126
middle = data .lstrip (spaceCharacters )
51
127
left = data [:len (data ) - len (middle )]
@@ -60,18 +136,44 @@ def text(self, data):
60
136
yield {"type" : "SpaceCharacters" , "data" : right }
61
137
62
138
def comment (self , data ):
139
+ """Generates a Comment token
140
+
141
+ :arg data: the comment
142
+
143
+ :returns: Comment token
144
+
145
+ """
63
146
return {"type" : "Comment" , "data" : data }
64
147
65
148
def doctype (self , name , publicId = None , systemId = None ):
149
+ """Generates a Doctype token
150
+
151
+ :arg name:
152
+
153
+ :arg publicId:
154
+
155
+ :arg systemId:
156
+
157
+ :returns: the Doctype token
158
+
159
+ """
66
160
return {"type" : "Doctype" ,
67
161
"name" : name ,
68
162
"publicId" : publicId ,
69
163
"systemId" : systemId }
70
164
71
165
def entity (self , name ):
166
+ """Generates an Entity token
167
+
168
+ :arg name: the entity name
169
+
170
+ :returns: an Entity token
171
+
172
+ """
72
173
return {"type" : "Entity" , "name" : name }
73
174
74
175
def unknown (self , nodeType ):
176
+ """Handles unknown node types"""
75
177
return self .error ("Unknown node type: " + nodeType )
76
178
77
179
0 commit comments