Skip to content

Commit 14d4851

Browse files
authored
Merge pull request #275 from gsnedders/attr_order
Fix attribute order
2 parents a3b8252 + 983a935 commit 14d4851

File tree

4 files changed

+40
-6
lines changed

4 files changed

+40
-6
lines changed

CHANGES.rst

+3-2
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@ Change Log
44
0.999999999/1.0b10
55
~~~~~~~~~~~~~~~~~~
66

7-
Released on XXX
7+
Released on July 15, 2016
88

9-
* XXX
9+
* Fix attribute order going to the tree builder to be document order
10+
instead of reverse document order(!).
1011

1112

1213
0.99999999/1.0b9

html5lib/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,4 @@
2222
"getTreeWalker", "serialize"]
2323

2424
# this has to be at the top level, see how setup.py parses this
25-
__version__ = "0.999999999-dev"
25+
__version__ = "0.9999999999-dev"

html5lib/html5parser.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,11 @@ def normalizeToken(self, token):
265265
""" HTML5 specific normalizations to the token stream """
266266

267267
if token["type"] == tokenTypes["StartTag"]:
268-
token["data"] = OrderedDict(token['data'][::-1])
268+
raw = token["data"]
269+
token["data"] = OrderedDict(raw)
270+
if len(raw) > len(token["data"]):
271+
# we had some duplicated attribute, fix so first wins
272+
token["data"].update(raw[::-1])
269273

270274
return token
271275

html5lib/tests/test_parser2.py

+31-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
from __future__ import absolute_import, division, unicode_literals
22

3-
from six import PY2, text_type
3+
from six import PY2, text_type, unichr
44

55
import io
66

77
from . import support # noqa
88

9-
from html5lib.constants import namespaces
9+
from html5lib.constants import namespaces, tokenTypes
1010
from html5lib import parse, parseFragment, HTMLParser
1111

1212

@@ -53,13 +53,42 @@ def test_unicode_file():
5353
assert parse(io.StringIO("a")) is not None
5454

5555

56+
def test_maintain_attribute_order():
57+
# This is here because we impl it in parser and not tokenizer
58+
p = HTMLParser()
59+
# generate loads to maximize the chance a hash-based mutation will occur
60+
attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))]
61+
token = {'name': 'html',
62+
'selfClosing': False,
63+
'selfClosingAcknowledged': False,
64+
'type': tokenTypes["StartTag"],
65+
'data': attrs}
66+
out = p.normalizeToken(token)
67+
attr_order = list(out["data"].keys())
68+
assert attr_order == [x for x, i in attrs]
69+
70+
5671
def test_duplicate_attribute():
5772
# This is here because we impl it in parser and not tokenizer
5873
doc = parse('<p class=a class=b>')
5974
el = doc[1][0]
6075
assert el.get("class") == "a"
6176

6277

78+
def test_maintain_duplicate_attribute_order():
79+
# This is here because we impl it in parser and not tokenizer
80+
p = HTMLParser()
81+
attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))]
82+
token = {'name': 'html',
83+
'selfClosing': False,
84+
'selfClosingAcknowledged': False,
85+
'type': tokenTypes["StartTag"],
86+
'data': attrs + [('a', len(attrs))]}
87+
out = p.normalizeToken(token)
88+
attr_order = list(out["data"].keys())
89+
assert attr_order == [x for x, i in attrs]
90+
91+
6392
def test_debug_log():
6493
parser = HTMLParser(debug=True)
6594
parser.parse("<!doctype html><title>a</title><p>b<script>c</script>d</p>e")

0 commit comments

Comments
 (0)