Skip to content

Commit 49f37d2

Browse files
kovidgoyalgsnedders
authored andcommitted
Basic parsing with the new html5lib lxml tree builder works
1 parent e269a2f commit 49f37d2

File tree

2 files changed

+8
-5
lines changed

2 files changed

+8
-5
lines changed

html5lib/html5parser.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,8 @@ def mainLoop(self):
155155
new_token = token
156156
while new_token is not None:
157157
currentNode = self.tree.openElements[-1] if self.tree.openElements else None
158-
currentNodeNamespace = currentNode.namespace if currentNode else None
159-
currentNodeName = currentNode.name if currentNode else None
158+
currentNodeNamespace = currentNode.namespace if currentNode is not None else None
159+
currentNodeName = currentNode.name if currentNode is not None else None
160160

161161
type = new_token["type"]
162162

@@ -472,9 +472,7 @@ def startTagHtml(self, token):
472472
self.parser.parseError("non-html-root")
473473
# XXX Need a check here to see if the first start tag token emitted is
474474
# this token... If it's not, invoke self.parser.parseError().
475-
for attr, value in token["data"].items():
476-
if attr not in self.tree.openElements[0].attributes:
477-
self.tree.openElements[0].attributes[attr] = value
475+
self.tree.apply_html_attributes(token['data'])
478476
self.parser.firstStartTag = False
479477

480478
def processEndTag(self, token):

html5lib/treebuilders/_base.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,11 @@ def createElement(self, token):
269269
element.attributes = token["data"]
270270
return element
271271

272+
def apply_html_attributes(self, attrs):
273+
for attr, value in attrs.items():
274+
if attr not in self.openElements[0].attributes:
275+
self.openElements[0].attributes[attr] = value
276+
272277
def _getInsertFromTable(self):
273278
return self._insertFromTable
274279

0 commit comments

Comments
 (0)