Skip to content

Commit 5d16169

Browse files
committed
Speedup setting attributes on etree implementations
1 parent 4cb98f5 commit 5d16169

File tree

2 files changed

+51
-30
lines changed

2 files changed

+51
-30
lines changed

html5lib/treebuilders/etree.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
import re
77

8+
from copy import copy
9+
810
from . import base
911
from .. import _ihatexml
1012
from .. import constants
@@ -61,16 +63,17 @@ def _getAttributes(self):
6163
return self._element.attrib
6264

6365
def _setAttributes(self, attributes):
64-
# Delete existing attributes first
65-
# XXX - there may be a better way to do this...
66-
for key in list(self._element.attrib.keys()):
67-
del self._element.attrib[key]
68-
for key, value in attributes.items():
69-
if isinstance(key, tuple):
70-
name = "{%s}%s" % (key[2], key[1])
71-
else:
72-
name = key
73-
self._element.set(name, value)
66+
el_attrib = self._element.attrib
67+
el_attrib.clear()
68+
if attributes:
69+
# calling .items _always_ allocates, and the above truthy check is cheaper than the
70+
# allocation on average
71+
for key, value in attributes.items():
72+
if isinstance(key, tuple):
73+
name = "{%s}%s" % (key[2], key[1])
74+
else:
75+
name = key
76+
el_attrib[name] = value
7477

7578
attributes = property(_getAttributes, _setAttributes)
7679

@@ -129,8 +132,8 @@ def insertText(self, data, insertBefore=None):
129132

130133
def cloneNode(self):
131134
element = type(self)(self.name, self.namespace)
132-
for name, value in self.attributes.items():
133-
element.attributes[name] = value
135+
if self._element.attrib:
136+
element._element.attrib = copy(self._element.attrib)
134137
return element
135138

136139
def reparentChildren(self, newParent):

html5lib/treebuilders/etree_lxml.py

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@
1616
import re
1717
import sys
1818

19+
try:
20+
from collections.abc import MutableMapping
21+
except ImportError:
22+
from collections import MutableMapping
23+
1924
from . import base
2025
from ..constants import DataLossWarning
2126
from .. import constants
@@ -185,26 +190,34 @@ def __init__(self, namespaceHTMLElements, fullTree=False):
185190
infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
186191
self.namespaceHTMLElements = namespaceHTMLElements
187192

188-
class Attributes(dict):
189-
def __init__(self, element, value=None):
190-
if value is None:
191-
value = {}
193+
class Attributes(MutableMapping):
194+
def __init__(self, element):
192195
self._element = element
193-
dict.__init__(self, value) # pylint:disable=non-parent-init-called
194-
for key, value in self.items():
195-
if isinstance(key, tuple):
196-
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
197-
else:
198-
name = infosetFilter.coerceAttribute(key)
199-
self._element._element.attrib[name] = value
200196

201-
def __setitem__(self, key, value):
202-
dict.__setitem__(self, key, value)
197+
def _coerceKey(self, key):
203198
if isinstance(key, tuple):
204199
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
205200
else:
206201
name = infosetFilter.coerceAttribute(key)
207-
self._element._element.attrib[name] = value
202+
return name
203+
204+
def __getitem__(self, key):
205+
return self._element._element.attrib[self._coerceKey(key)]
206+
207+
def __setitem__(self, key, value):
208+
self._element._element.attrib[self._coerceKey(key)] = value
209+
210+
def __delitem__(self, key):
211+
del self._element._element.attrib[self._coerceKey(key)]
212+
213+
def __iter__(self):
214+
return iter(self._element._element.attrib)
215+
216+
def __len__(self):
217+
return len(self._element._element.attrib)
218+
219+
def clear(self):
220+
return self._element._element.attrib.clear()
208221

209222
class Element(builder.Element):
210223
def __init__(self, name, namespace):
@@ -225,17 +238,22 @@ def _getName(self):
225238
def _getAttributes(self):
226239
return self._attributes
227240

228-
def _setAttributes(self, attributes):
229-
self._attributes = Attributes(self, attributes)
241+
def _setAttributes(self, value):
242+
attributes = self.attributes
243+
attributes.clear()
244+
attributes.update(value)
230245

231246
attributes = property(_getAttributes, _setAttributes)
232247

233248
def insertText(self, data, insertBefore=None):
234249
data = infosetFilter.coerceCharacters(data)
235250
builder.Element.insertText(self, data, insertBefore)
236251

237-
def appendChild(self, child):
238-
builder.Element.appendChild(self, child)
252+
def cloneNode(self):
253+
element = type(self)(self.name, self.namespace)
254+
if self._element.attrib:
255+
element._element.attrib.update(self._element.attrib)
256+
return element
239257

240258
class Comment(builder.Comment):
241259
def __init__(self, data):

0 commit comments

Comments
 (0)