Skip to content

Commit db0b5a0

Browse files
kovidgoyalgsnedders
authored andcommitted
Preserve attribute order when parsing
1 parent cc9f28a commit db0b5a0

File tree

2 files changed

+84
-92
lines changed

2 files changed

+84
-92
lines changed

html5lib/constants.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,73 @@
433433
(namespaces["mathml"], "mtext")
434434
))
435435

436+
adjustSVGAttributes = {
437+
"attributename": "attributeName",
438+
"attributetype": "attributeType",
439+
"basefrequency": "baseFrequency",
440+
"baseprofile": "baseProfile",
441+
"calcmode": "calcMode",
442+
"clippathunits": "clipPathUnits",
443+
"contentscripttype": "contentScriptType",
444+
"contentstyletype": "contentStyleType",
445+
"diffuseconstant": "diffuseConstant",
446+
"edgemode": "edgeMode",
447+
"externalresourcesrequired": "externalResourcesRequired",
448+
"filterres": "filterRes",
449+
"filterunits": "filterUnits",
450+
"glyphref": "glyphRef",
451+
"gradienttransform": "gradientTransform",
452+
"gradientunits": "gradientUnits",
453+
"kernelmatrix": "kernelMatrix",
454+
"kernelunitlength": "kernelUnitLength",
455+
"keypoints": "keyPoints",
456+
"keysplines": "keySplines",
457+
"keytimes": "keyTimes",
458+
"lengthadjust": "lengthAdjust",
459+
"limitingconeangle": "limitingConeAngle",
460+
"markerheight": "markerHeight",
461+
"markerunits": "markerUnits",
462+
"markerwidth": "markerWidth",
463+
"maskcontentunits": "maskContentUnits",
464+
"maskunits": "maskUnits",
465+
"numoctaves": "numOctaves",
466+
"pathlength": "pathLength",
467+
"patterncontentunits": "patternContentUnits",
468+
"patterntransform": "patternTransform",
469+
"patternunits": "patternUnits",
470+
"pointsatx": "pointsAtX",
471+
"pointsaty": "pointsAtY",
472+
"pointsatz": "pointsAtZ",
473+
"preservealpha": "preserveAlpha",
474+
"preserveaspectratio": "preserveAspectRatio",
475+
"primitiveunits": "primitiveUnits",
476+
"refx": "refX",
477+
"refy": "refY",
478+
"repeatcount": "repeatCount",
479+
"repeatdur": "repeatDur",
480+
"requiredextensions": "requiredExtensions",
481+
"requiredfeatures": "requiredFeatures",
482+
"specularconstant": "specularConstant",
483+
"specularexponent": "specularExponent",
484+
"spreadmethod": "spreadMethod",
485+
"startoffset": "startOffset",
486+
"stddeviation": "stdDeviation",
487+
"stitchtiles": "stitchTiles",
488+
"surfacescale": "surfaceScale",
489+
"systemlanguage": "systemLanguage",
490+
"tablevalues": "tableValues",
491+
"targetx": "targetX",
492+
"targety": "targetY",
493+
"textlength": "textLength",
494+
"viewbox": "viewBox",
495+
"viewtarget": "viewTarget",
496+
"xchannelselector": "xChannelSelector",
497+
"ychannelselector": "yChannelSelector",
498+
"zoomandpan": "zoomAndPan"
499+
}
500+
501+
adjustMathMLAttributes = {"definitionurl": "definitionURL"}
502+
436503
adjustForeignAttributes = {
437504
"xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
438505
"xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]),

html5lib/html5parser.py

Lines changed: 17 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from six import with_metaclass
33

44
import types
5+
from collections import OrderedDict
56

67
from . import inputstream
78
from . import tokenizer
@@ -10,14 +11,12 @@
1011
from .treebuilders._base import Marker
1112

1213
from . import utils
13-
from . import constants
14-
from .constants import spaceCharacters, asciiUpper2Lower
15-
from .constants import specialElements
16-
from .constants import headingElements
17-
from .constants import cdataElements, rcdataElements
18-
from .constants import tokenTypes, ReparseException, namespaces
19-
from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements
20-
from .constants import adjustForeignAttributes as adjustForeignAttributesMap
14+
from .constants import (
15+
spaceCharacters, asciiUpper2Lower, specialElements, headingElements,
16+
cdataElements, rcdataElements, tokenTypes, tagTokenTypes, ReparseException, namespaces,
17+
htmlIntegrationPointElements, mathmlTextIntegrationPointElements,
18+
adjustForeignAttributes as adjustForeignAttributesMap, adjustSVGAttributes,
19+
adjustMathMLAttributes)
2120

2221

2322
def parse(doc, treebuilder="etree", encoding=None,
@@ -255,96 +254,18 @@ def normalizeToken(self, token):
255254
""" HTML5 specific normalizations to the token stream """
256255

257256
if token["type"] == tokenTypes["StartTag"]:
258-
token["data"] = dict(token["data"][::-1])
257+
token["data"] = OrderedDict(token['data'])
259258

260259
return token
261260

262261
def adjustMathMLAttributes(self, token):
263-
replacements = {"definitionurl": "definitionURL"}
264-
for k, v in replacements.items():
265-
if k in token["data"]:
266-
token["data"][v] = token["data"][k]
267-
del token["data"][k]
262+
adjust_attributes(token, adjustMathMLAttributes)
268263

269264
def adjustSVGAttributes(self, token):
270-
replacements = {
271-
"attributename": "attributeName",
272-
"attributetype": "attributeType",
273-
"basefrequency": "baseFrequency",
274-
"baseprofile": "baseProfile",
275-
"calcmode": "calcMode",
276-
"clippathunits": "clipPathUnits",
277-
"contentscripttype": "contentScriptType",
278-
"contentstyletype": "contentStyleType",
279-
"diffuseconstant": "diffuseConstant",
280-
"edgemode": "edgeMode",
281-
"externalresourcesrequired": "externalResourcesRequired",
282-
"filterres": "filterRes",
283-
"filterunits": "filterUnits",
284-
"glyphref": "glyphRef",
285-
"gradienttransform": "gradientTransform",
286-
"gradientunits": "gradientUnits",
287-
"kernelmatrix": "kernelMatrix",
288-
"kernelunitlength": "kernelUnitLength",
289-
"keypoints": "keyPoints",
290-
"keysplines": "keySplines",
291-
"keytimes": "keyTimes",
292-
"lengthadjust": "lengthAdjust",
293-
"limitingconeangle": "limitingConeAngle",
294-
"markerheight": "markerHeight",
295-
"markerunits": "markerUnits",
296-
"markerwidth": "markerWidth",
297-
"maskcontentunits": "maskContentUnits",
298-
"maskunits": "maskUnits",
299-
"numoctaves": "numOctaves",
300-
"pathlength": "pathLength",
301-
"patterncontentunits": "patternContentUnits",
302-
"patterntransform": "patternTransform",
303-
"patternunits": "patternUnits",
304-
"pointsatx": "pointsAtX",
305-
"pointsaty": "pointsAtY",
306-
"pointsatz": "pointsAtZ",
307-
"preservealpha": "preserveAlpha",
308-
"preserveaspectratio": "preserveAspectRatio",
309-
"primitiveunits": "primitiveUnits",
310-
"refx": "refX",
311-
"refy": "refY",
312-
"repeatcount": "repeatCount",
313-
"repeatdur": "repeatDur",
314-
"requiredextensions": "requiredExtensions",
315-
"requiredfeatures": "requiredFeatures",
316-
"specularconstant": "specularConstant",
317-
"specularexponent": "specularExponent",
318-
"spreadmethod": "spreadMethod",
319-
"startoffset": "startOffset",
320-
"stddeviation": "stdDeviation",
321-
"stitchtiles": "stitchTiles",
322-
"surfacescale": "surfaceScale",
323-
"systemlanguage": "systemLanguage",
324-
"tablevalues": "tableValues",
325-
"targetx": "targetX",
326-
"targety": "targetY",
327-
"textlength": "textLength",
328-
"viewbox": "viewBox",
329-
"viewtarget": "viewTarget",
330-
"xchannelselector": "xChannelSelector",
331-
"ychannelselector": "yChannelSelector",
332-
"zoomandpan": "zoomAndPan"
333-
}
334-
for originalName in list(token["data"].keys()):
335-
if originalName in replacements:
336-
svgName = replacements[originalName]
337-
token["data"][svgName] = token["data"][originalName]
338-
del token["data"][originalName]
265+
adjust_attributes(token, adjustSVGAttributes)
339266

340267
def adjustForeignAttributes(self, token):
341-
replacements = adjustForeignAttributesMap
342-
343-
for originalName in token["data"].keys():
344-
if originalName in replacements:
345-
foreignName = replacements[originalName]
346-
token["data"][foreignName] = token["data"][originalName]
347-
del token["data"][originalName]
268+
adjust_attributes(token, adjustForeignAttributesMap)
348269

349270
def reparseTokenNormal(self, token):
350271
self.parser.phase()
@@ -424,7 +345,7 @@ def getPhases(debug):
424345
def log(function):
425346
"""Logger that records which phase processes each token"""
426347
type_names = dict((value, key) for key, value in
427-
constants.tokenTypes.items())
348+
tokenTypes.items())
428349

429350
def wrapped(self, *args, **kwargs):
430351
if function.__name__.startswith("process") and len(args) > 0:
@@ -433,7 +354,7 @@ def wrapped(self, *args, **kwargs):
433354
info = {"type": type_names[token['type']]}
434355
except:
435356
raise
436-
if token['type'] in constants.tagTokenTypes:
357+
if token['type'] in tagTokenTypes:
437358
info["name"] = token['name']
438359

439360
self.parser.log.append((self.parser.tokenizer.state.__name__,
@@ -2721,6 +2642,10 @@ def processEndTag(self, token):
27212642
# XXX after after frameset
27222643
}
27232644

2645+
def adjust_attributes(token, replacements):
2646+
if frozenset(token['data']) & frozenset(replacements):
2647+
token['data'] = OrderedDict(
2648+
(replacements.get(k, k), v) for k, v in token['data'].iteritems())
27242649

27252650
class ParseError(Exception):
27262651

0 commit comments

Comments
 (0)