Skip to content

Fix attribute order #275

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 15, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ Change Log
0.999999999/1.0b10
~~~~~~~~~~~~~~~~~~

Released on XXX
Released on July 15, 2016

* XXX
* Fix attribute order going to the tree builder to be document order
instead of reverse document order(!).


0.99999999/1.0b9
Expand Down
2 changes: 1 addition & 1 deletion html5lib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@
"getTreeWalker", "serialize"]

# this has to be at the top level, see how setup.py parses this
__version__ = "0.999999999-dev"
__version__ = "0.9999999999-dev"
6 changes: 5 additions & 1 deletion html5lib/html5parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,11 @@ def normalizeToken(self, token):
""" HTML5 specific normalizations to the token stream """

if token["type"] == tokenTypes["StartTag"]:
token["data"] = OrderedDict(token['data'][::-1])
raw = token["data"]
token["data"] = OrderedDict(raw)
if len(raw) > len(token["data"]):
# we had some duplicated attribute, fix so first wins
token["data"].update(raw[::-1])

return token

Expand Down
33 changes: 31 additions & 2 deletions html5lib/tests/test_parser2.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from __future__ import absolute_import, division, unicode_literals

from six import PY2, text_type
from six import PY2, text_type, unichr

import io

from . import support # noqa

from html5lib.constants import namespaces
from html5lib.constants import namespaces, tokenTypes
from html5lib import parse, parseFragment, HTMLParser


Expand Down Expand Up @@ -53,13 +53,42 @@ def test_unicode_file():
assert parse(io.StringIO("a")) is not None


def test_maintain_attribute_order():
# This is here because we impl it in parser and not tokenizer
p = HTMLParser()
# generate loads to maximize the chance a hash-based mutation will occur
attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))]
token = {'name': 'html',
'selfClosing': False,
'selfClosingAcknowledged': False,
'type': tokenTypes["StartTag"],
'data': attrs}
out = p.normalizeToken(token)
attr_order = list(out["data"].keys())
assert attr_order == [x for x, i in attrs]


def test_duplicate_attribute():
# This is here because we impl it in parser and not tokenizer
doc = parse('<p class=a class=b>')
el = doc[1][0]
assert el.get("class") == "a"


def test_maintain_duplicate_attribute_order():
# This is here because we impl it in parser and not tokenizer
p = HTMLParser()
attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))]
token = {'name': 'html',
'selfClosing': False,
'selfClosingAcknowledged': False,
'type': tokenTypes["StartTag"],
'data': attrs + [('a', len(attrs))]}
out = p.normalizeToken(token)
attr_order = list(out["data"].keys())
assert attr_order == [x for x, i in attrs]


def test_debug_log():
parser = HTMLParser(debug=True)
parser.parse("<!doctype html><title>a</title><p>b<script>c</script>d</p>e")
Expand Down