1
1
from __future__ import absolute_import , division , unicode_literals
2
- from six import text_type , string_types
3
2
4
3
from xml .dom import Node
5
- from ..constants import voidElements , spaceCharacters
4
+ from ..constants import namespaces , voidElements , spaceCharacters
6
5
7
6
__all__ = ["DOCUMENT" , "DOCTYPE" , "TEXT" , "ELEMENT" , "COMMENT" , "ENTITY" , "UNKNOWN" ,
8
7
"TreeWalker" , "NonRecursiveTreeWalker" ]
18
17
spaceCharacters = "" .join (spaceCharacters )
19
18
20
19
21
- def to_text (s , blank_if_none = True ):
22
- """Wrapper around six.text_type to convert None to empty string"""
23
- if s is None :
24
- if blank_if_none :
25
- return ""
26
- else :
27
- return None
28
- elif isinstance (s , text_type ):
29
- return s
30
- else :
31
- return text_type (s )
32
-
33
-
34
- def is_text_or_none (string ):
35
- """Wrapper around isinstance(string_types) or is None"""
36
- return string is None or isinstance (string , string_types )
37
-
38
-
39
20
class TreeWalker (object ):
40
21
def __init__ (self , tree ):
41
22
self .tree = tree
@@ -47,47 +28,25 @@ def error(self, msg):
47
28
return {"type" : "SerializeError" , "data" : msg }
48
29
49
30
def emptyTag (self , namespace , name , attrs , hasChildren = False ):
50
- assert namespace is None or isinstance (namespace , string_types ), type (namespace )
51
- assert isinstance (name , string_types ), type (name )
52
- assert all ((namespace is None or isinstance (namespace , string_types )) and
53
- isinstance (name , string_types ) and
54
- isinstance (value , string_types )
55
- for (namespace , name ), value in attrs .items ())
56
-
57
- yield {"type" : "EmptyTag" , "name" : to_text (name , False ),
58
- "namespace" : to_text (namespace ),
31
+ yield {"type" : "EmptyTag" , "name" : name ,
32
+ "namespace" : namespace ,
59
33
"data" : attrs }
60
34
if hasChildren :
61
35
yield self .error ("Void element has children" )
62
36
63
37
def startTag (self , namespace , name , attrs ):
64
- assert namespace is None or isinstance (namespace , string_types ), type (namespace )
65
- assert isinstance (name , string_types ), type (name )
66
- assert all ((namespace is None or isinstance (namespace , string_types )) and
67
- isinstance (name , string_types ) and
68
- isinstance (value , string_types )
69
- for (namespace , name ), value in attrs .items ())
70
-
71
38
return {"type" : "StartTag" ,
72
- "name" : text_type (name ),
73
- "namespace" : to_text (namespace ),
74
- "data" : dict (((to_text (namespace , False ), to_text (name )),
75
- to_text (value , False ))
76
- for (namespace , name ), value in attrs .items ())}
39
+ "name" : name ,
40
+ "namespace" : namespace ,
41
+ "data" : attrs }
77
42
78
43
def endTag (self , namespace , name ):
79
- assert namespace is None or isinstance (namespace , string_types ), type (namespace )
80
- assert isinstance (name , string_types ), type (namespace )
81
-
82
44
return {"type" : "EndTag" ,
83
- "name" : to_text (name , False ),
84
- "namespace" : to_text (namespace ),
85
- "data" : {}}
45
+ "name" : name ,
46
+ "namespace" : namespace }
86
47
87
48
def text (self , data ):
88
- assert isinstance (data , string_types ), type (data )
89
-
90
- data = to_text (data )
49
+ data = data
91
50
middle = data .lstrip (spaceCharacters )
92
51
left = data [:len (data ) - len (middle )]
93
52
if left :
@@ -101,25 +60,16 @@ def text(self, data):
101
60
yield {"type" : "SpaceCharacters" , "data" : right }
102
61
103
62
def comment (self , data ):
104
- assert isinstance (data , string_types ), type (data )
105
-
106
- return {"type" : "Comment" , "data" : text_type (data )}
107
-
108
- def doctype (self , name , publicId = None , systemId = None , correct = True ):
109
- assert is_text_or_none (name ), type (name )
110
- assert is_text_or_none (publicId ), type (publicId )
111
- assert is_text_or_none (systemId ), type (systemId )
63
+ return {"type" : "Comment" , "data" : data }
112
64
65
+ def doctype (self , name , publicId = None , systemId = None ):
113
66
return {"type" : "Doctype" ,
114
- "name" : to_text (name ),
115
- "publicId" : to_text (publicId ),
116
- "systemId" : to_text (systemId ),
117
- "correct" : to_text (correct )}
67
+ "name" : name ,
68
+ "publicId" : publicId ,
69
+ "systemId" : systemId }
118
70
119
71
def entity (self , name ):
120
- assert isinstance (name , string_types ), type (name )
121
-
122
- return {"type" : "Entity" , "name" : text_type (name )}
72
+ return {"type" : "Entity" , "name" : name }
123
73
124
74
def unknown (self , nodeType ):
125
75
return self .error ("Unknown node type: " + nodeType )
@@ -154,7 +104,7 @@ def __iter__(self):
154
104
155
105
elif type == ELEMENT :
156
106
namespace , name , attributes , hasChildren = details
157
- if name in voidElements :
107
+ if ( not namespace or namespace == namespaces [ "html" ]) and name in voidElements :
158
108
for token in self .emptyTag (namespace , name , attributes ,
159
109
hasChildren ):
160
110
yield token
@@ -187,7 +137,7 @@ def __iter__(self):
187
137
type , details = details [0 ], details [1 :]
188
138
if type == ELEMENT :
189
139
namespace , name , attributes , hasChildren = details
190
- if name not in voidElements :
140
+ if ( namespace and namespace != namespaces [ "html" ]) or name not in voidElements :
191
141
yield self .endTag (namespace , name )
192
142
if self .tree is currentNode :
193
143
currentNode = None
0 commit comments