2
2
from six import with_metaclass
3
3
4
4
import types
5
+ from collections import OrderedDict
5
6
6
7
from . import inputstream
7
8
from . import tokenizer
10
11
from .treebuilders ._base import Marker
11
12
12
13
from . import utils
13
- from . import constants
14
- from .constants import spaceCharacters , asciiUpper2Lower
15
- from .constants import specialElements
16
- from .constants import headingElements
17
- from .constants import cdataElements , rcdataElements
18
- from .constants import tokenTypes , ReparseException , namespaces
19
- from .constants import htmlIntegrationPointElements , mathmlTextIntegrationPointElements
20
- from .constants import adjustForeignAttributes as adjustForeignAttributesMap
14
+ from .constants import (
15
+ spaceCharacters , asciiUpper2Lower , specialElements , headingElements ,
16
+ cdataElements , rcdataElements , tokenTypes , tagTokenTypes , ReparseException , namespaces ,
17
+ htmlIntegrationPointElements , mathmlTextIntegrationPointElements ,
18
+ adjustForeignAttributes as adjustForeignAttributesMap , adjustSVGAttributes ,
19
+ adjustMathMLAttributes )
21
20
22
21
23
22
def parse (doc , treebuilder = "etree" , encoding = None ,
@@ -255,96 +254,18 @@ def normalizeToken(self, token):
255
254
""" HTML5 specific normalizations to the token stream """
256
255
257
256
if token ["type" ] == tokenTypes ["StartTag" ]:
258
- token ["data" ] = dict (token [" data" ][:: - 1 ])
257
+ token ["data" ] = OrderedDict (token [' data' ])
259
258
260
259
return token
261
260
262
261
def adjustMathMLAttributes (self , token ):
263
- replacements = {"definitionurl" : "definitionURL" }
264
- for k , v in replacements .items ():
265
- if k in token ["data" ]:
266
- token ["data" ][v ] = token ["data" ][k ]
267
- del token ["data" ][k ]
262
+ adjust_attributes (token , adjustMathMLAttributes )
268
263
269
264
def adjustSVGAttributes (self , token ):
270
- replacements = {
271
- "attributename" : "attributeName" ,
272
- "attributetype" : "attributeType" ,
273
- "basefrequency" : "baseFrequency" ,
274
- "baseprofile" : "baseProfile" ,
275
- "calcmode" : "calcMode" ,
276
- "clippathunits" : "clipPathUnits" ,
277
- "contentscripttype" : "contentScriptType" ,
278
- "contentstyletype" : "contentStyleType" ,
279
- "diffuseconstant" : "diffuseConstant" ,
280
- "edgemode" : "edgeMode" ,
281
- "externalresourcesrequired" : "externalResourcesRequired" ,
282
- "filterres" : "filterRes" ,
283
- "filterunits" : "filterUnits" ,
284
- "glyphref" : "glyphRef" ,
285
- "gradienttransform" : "gradientTransform" ,
286
- "gradientunits" : "gradientUnits" ,
287
- "kernelmatrix" : "kernelMatrix" ,
288
- "kernelunitlength" : "kernelUnitLength" ,
289
- "keypoints" : "keyPoints" ,
290
- "keysplines" : "keySplines" ,
291
- "keytimes" : "keyTimes" ,
292
- "lengthadjust" : "lengthAdjust" ,
293
- "limitingconeangle" : "limitingConeAngle" ,
294
- "markerheight" : "markerHeight" ,
295
- "markerunits" : "markerUnits" ,
296
- "markerwidth" : "markerWidth" ,
297
- "maskcontentunits" : "maskContentUnits" ,
298
- "maskunits" : "maskUnits" ,
299
- "numoctaves" : "numOctaves" ,
300
- "pathlength" : "pathLength" ,
301
- "patterncontentunits" : "patternContentUnits" ,
302
- "patterntransform" : "patternTransform" ,
303
- "patternunits" : "patternUnits" ,
304
- "pointsatx" : "pointsAtX" ,
305
- "pointsaty" : "pointsAtY" ,
306
- "pointsatz" : "pointsAtZ" ,
307
- "preservealpha" : "preserveAlpha" ,
308
- "preserveaspectratio" : "preserveAspectRatio" ,
309
- "primitiveunits" : "primitiveUnits" ,
310
- "refx" : "refX" ,
311
- "refy" : "refY" ,
312
- "repeatcount" : "repeatCount" ,
313
- "repeatdur" : "repeatDur" ,
314
- "requiredextensions" : "requiredExtensions" ,
315
- "requiredfeatures" : "requiredFeatures" ,
316
- "specularconstant" : "specularConstant" ,
317
- "specularexponent" : "specularExponent" ,
318
- "spreadmethod" : "spreadMethod" ,
319
- "startoffset" : "startOffset" ,
320
- "stddeviation" : "stdDeviation" ,
321
- "stitchtiles" : "stitchTiles" ,
322
- "surfacescale" : "surfaceScale" ,
323
- "systemlanguage" : "systemLanguage" ,
324
- "tablevalues" : "tableValues" ,
325
- "targetx" : "targetX" ,
326
- "targety" : "targetY" ,
327
- "textlength" : "textLength" ,
328
- "viewbox" : "viewBox" ,
329
- "viewtarget" : "viewTarget" ,
330
- "xchannelselector" : "xChannelSelector" ,
331
- "ychannelselector" : "yChannelSelector" ,
332
- "zoomandpan" : "zoomAndPan"
333
- }
334
- for originalName in list (token ["data" ].keys ()):
335
- if originalName in replacements :
336
- svgName = replacements [originalName ]
337
- token ["data" ][svgName ] = token ["data" ][originalName ]
338
- del token ["data" ][originalName ]
265
+ adjust_attributes (token , adjustSVGAttributes )
339
266
340
267
def adjustForeignAttributes (self , token ):
341
- replacements = adjustForeignAttributesMap
342
-
343
- for originalName in token ["data" ].keys ():
344
- if originalName in replacements :
345
- foreignName = replacements [originalName ]
346
- token ["data" ][foreignName ] = token ["data" ][originalName ]
347
- del token ["data" ][originalName ]
268
+ adjust_attributes (token , adjustForeignAttributesMap )
348
269
349
270
def reparseTokenNormal (self , token ):
350
271
self .parser .phase ()
@@ -424,7 +345,7 @@ def getPhases(debug):
424
345
def log (function ):
425
346
"""Logger that records which phase processes each token"""
426
347
type_names = dict ((value , key ) for key , value in
427
- constants . tokenTypes .items ())
348
+ tokenTypes .items ())
428
349
429
350
def wrapped (self , * args , ** kwargs ):
430
351
if function .__name__ .startswith ("process" ) and len (args ) > 0 :
@@ -433,7 +354,7 @@ def wrapped(self, *args, **kwargs):
433
354
info = {"type" : type_names [token ['type' ]]}
434
355
except :
435
356
raise
436
- if token ['type' ] in constants . tagTokenTypes :
357
+ if token ['type' ] in tagTokenTypes :
437
358
info ["name" ] = token ['name' ]
438
359
439
360
self .parser .log .append ((self .parser .tokenizer .state .__name__ ,
@@ -2721,6 +2642,10 @@ def processEndTag(self, token):
2721
2642
# XXX after after frameset
2722
2643
}
2723
2644
2645
+ def adjust_attributes (token , replacements ):
2646
+ if frozenset (token ['data' ]) & frozenset (replacements ):
2647
+ token ['data' ] = OrderedDict (
2648
+ (replacements .get (k , k ), v ) for k , v in token ['data' ].iteritems ())
2724
2649
2725
2650
class ParseError (Exception ):
2726
2651
0 commit comments