Skip to content

Update pytest #497

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .pytest.expect
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
pytest-expect file v1
(2, 7, 11, 'final', 0)
b'html5lib/tests/test_encoding.py::test_encoding::[110]': FAIL
b'html5lib/tests/test_encoding.py::test_encoding::[111]': FAIL
(2, 7, 18, 'final', 0)
b'html5lib/tests/test_encoding.py::test_parser_encoding[<!DOCTYPE HTML>\\n<script>document.write(\'<meta charset="ISO-8859-\' + \'2">\')</script>-iso-8859-2]': FAIL
b'html5lib/tests/test_encoding.py::test_prescan_encoding[<!DOCTYPE HTML>\\n<script>document.write(\'<meta charset="ISO-8859-\' + \'2">\')</script>-iso-8859-2]': FAIL
u'html5lib/tests/testdata/tokenizer/test2.test::0::dataState': FAIL
u'html5lib/tests/testdata/tokenizer/test3.test::228::dataState': FAIL
u'html5lib/tests/testdata/tokenizer/test3.test::231::dataState': FAIL
Expand Down
21 changes: 11 additions & 10 deletions html5lib/tests/test_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,15 @@ def test_parser_args_raises(kwargs):
assert exc_info.value.args[0].startswith("Cannot set an encoding with a unicode input")


def runParserEncodingTest(data, encoding):
def param_encoding():
for filename in get_data_files("encoding"):
tests = _TestData(filename, b"data", encoding=None)
for test in tests:
yield test[b'data'], test[b'encoding']


@pytest.mark.parametrize("data, encoding", param_encoding())
def test_parser_encoding(data, encoding):
p = HTMLParser()
assert p.documentEncoding is None
p.parse(data, useChardet=False)
Expand All @@ -84,7 +92,8 @@ def runParserEncodingTest(data, encoding):
assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding)


def runPreScanEncodingTest(data, encoding):
@pytest.mark.parametrize("data, encoding", param_encoding())
def test_prescan_encoding(data, encoding):
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
encoding = encoding.lower().decode("ascii")

Expand All @@ -95,14 +104,6 @@ def runPreScanEncodingTest(data, encoding):
assert encoding == stream.charEncoding[0].name, errorMessage(data, encoding, stream.charEncoding[0].name)


def test_encoding():
for filename in get_data_files("encoding"):
tests = _TestData(filename, b"data", encoding=None)
for test in tests:
yield (runParserEncodingTest, test[b'data'], test[b'encoding'])
yield (runPreScanEncodingTest, test[b'data'], test[b'encoding'])


# pylint:disable=wrong-import-position
try:
import chardet # noqa
Expand Down
45 changes: 25 additions & 20 deletions html5lib/tests/test_sanitizer.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,11 @@
from __future__ import absolute_import, division, unicode_literals

import pytest

from html5lib import constants, parseFragment, serialize
from html5lib.filters import sanitizer


def runSanitizerTest(_, expected, input):
parsed = parseFragment(expected)
expected = serialize(parsed,
omit_optional_tags=False,
use_trailing_solidus=True,
space_before_trailing_solidus=False,
quote_attr_values="always",
quote_char='"',
alphabetical_attributes=True)
assert expected == sanitize_html(input)


def sanitize_html(stream):
parsed = parseFragment(stream)
serialized = serialize(parsed,
Expand Down Expand Up @@ -59,27 +49,27 @@ def test_data_uri_disallowed_type():
assert expected == sanitized


def test_sanitizer():
def param_sanitizer():
for ns, tag_name in sanitizer.allowed_elements:
if ns != constants.namespaces["html"]:
continue
if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td',
'tfoot', 'th', 'thead', 'tr', 'select']:
continue # TODO
if tag_name == 'image':
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
yield ("test_should_allow_%s_tag" % tag_name,
"<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
elif tag_name == 'br':
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
yield ("test_should_allow_%s_tag" % tag_name,
"<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
elif tag_name in constants.voidElements:
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
yield ("test_should_allow_%s_tag" % tag_name,
"<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
else:
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
yield ("test_should_allow_%s_tag" % tag_name,
"<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name, tag_name),
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))

Expand All @@ -93,15 +83,15 @@ def test_sanitizer():
attribute_value = 'foo'
if attribute_name in sanitizer.attr_val_is_uri:
attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0]
yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name,
yield ("test_should_allow_%s_attribute" % attribute_name,
"<p %s=\"%s\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % (attribute_name, attribute_value),
"<p %s='%s'>foo <bad>bar</bad> baz</p>" % (attribute_name, attribute_value))

for protocol in sanitizer.allowed_protocols:
rest_of_uri = '//sub.domain.tld/path/object.ext'
if protocol == 'data':
rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
yield ("test_should_allow_uppercase_%s_uris" % protocol,
"<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
"""<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))

Expand All @@ -110,11 +100,26 @@ def test_sanitizer():
if protocol == 'data':
rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
protocol = protocol.upper()
yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
yield ("test_should_allow_uppercase_%s_uris" % protocol,
"<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
"""<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))


@pytest.mark.parametrize("expected, input",
(pytest.param(expected, input, id=id)
for id, expected, input in param_sanitizer()))
def test_sanitizer(expected, input):
parsed = parseFragment(expected)
expected = serialize(parsed,
omit_optional_tags=False,
use_trailing_solidus=True,
space_before_trailing_solidus=False,
quote_attr_values="always",
quote_char='"',
alphabetical_attributes=True)
assert expected == sanitize_html(input)


def test_lowercase_color_codes_in_style():
sanitized = sanitize_html("<p style=\"border: 1px solid #a2a2a2;\"></p>")
expected = '<p style=\"border: 1px solid #a2a2a2;\"></p>'
Expand Down
49 changes: 25 additions & 24 deletions html5lib/tests/test_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,19 +89,6 @@ def serialize_html(input, options):
return serializer.render(stream, encoding)


def runSerializerTest(input, expected, options):
encoding = options.get("encoding", None)

if encoding:
expected = list(map(lambda x: x.encode(encoding), expected))

result = serialize_html(input, options)
if len(expected) == 1:
assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
elif result not in expected:
assert False, "Expected: %s, Received: %s" % (expected, result)


def throwsWithLatin1(input):
with pytest.raises(UnicodeEncodeError):
serialize_html(input, {"encoding": "iso-8859-1"})
Expand All @@ -120,13 +107,13 @@ def testDoctypeSystemId():


def testCdataCharacters():
runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
["<style>&amacr;"], {"encoding": "iso-8859-1"})
test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
["<style>&amacr;"], {"encoding": "iso-8859-1"})


def testCharacters():
runSerializerTest([["Characters", "\u0101"]],
["&amacr;"], {"encoding": "iso-8859-1"})
test_serializer([["Characters", "\u0101"]],
["&amacr;"], {"encoding": "iso-8859-1"})


def testStartTagName():
Expand All @@ -138,9 +125,9 @@ def testAttributeName():


def testAttributeValue():
runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "span",
[{"namespace": None, "name": "potato", "value": "\u0101"}]]],
["<span potato=&amacr;>"], {"encoding": "iso-8859-1"})
test_serializer([["StartTag", "http://www.w3.org/1999/xhtml", "span",
[{"namespace": None, "name": "potato", "value": "\u0101"}]]],
["<span potato=&amacr;>"], {"encoding": "iso-8859-1"})


def testEndTagName():
Expand All @@ -165,7 +152,7 @@ def testSpecQuoteAttribute(c):
else:
output_ = ['<span foo="%s">' % c]
options_ = {"quote_attr_values": "spec"}
runSerializerTest(input_, output_, options_)
test_serializer(input_, output_, options_)


@pytest.mark.parametrize("c", list("\t\n\u000C\x20\r\"'=<>`"
Expand All @@ -184,7 +171,7 @@ def testLegacyQuoteAttribute(c):
else:
output_ = ['<span foo="%s">' % c]
options_ = {"quote_attr_values": "legacy"}
runSerializerTest(input_, output_, options_)
test_serializer(input_, output_, options_)


@pytest.fixture
Expand Down Expand Up @@ -217,9 +204,23 @@ def testEntityNoResolve(lxml_parser):
assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>'


def test_serializer():
def param_serializer():
for filename in get_data_files('serializer-testdata', '*.test', os.path.dirname(__file__)):
with open(filename) as fp:
tests = json.load(fp)
for test in tests['tests']:
yield runSerializerTest, test["input"], test["expected"], test.get("options", {})
yield test["input"], test["expected"], test.get("options", {})


@pytest.mark.parametrize("input, expected, options", param_serializer())
def test_serializer(input, expected, options):
encoding = options.get("encoding", None)

if encoding:
expected = list(map(lambda x: x.encode(encoding), expected))

result = serialize_html(input, options)
if len(expected) == 1:
assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
elif result not in expected:
assert False, "Expected: %s, Received: %s" % (expected, result)
39 changes: 20 additions & 19 deletions html5lib/tests/test_treewalkers.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,24 +61,7 @@ def set_attribute_on_first_child(docfrag, name, value, treeName):
setter['ElementTree'](docfrag)(name, value)


def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
"""tests what happens when we add attributes to the intext"""
treeName, treeClass = tree
if treeClass is None:
pytest.skip("Treebuilder not loaded")
parser = html5parser.HTMLParser(tree=treeClass["builder"])
document = parser.parseFragment(intext)
for nom, val in attrs_to_add:
set_attribute_on_first_child(document, nom, val, treeName)

document = treeClass.get("adapter", lambda x: x)(document)
output = treewalkers.pprint(treeClass["walker"](document))
output = attrlist.sub(sortattrs, output)
if output not in expected:
raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))


def test_treewalker_six_mix():
def param_treewalker_six_mix():
"""Str/Unicode mix. If str attrs added to tree"""

# On Python 2.x string literals are of type str. Unless, like this
Expand All @@ -99,7 +82,25 @@ def test_treewalker_six_mix():

for tree in sorted(treeTypes.items()):
for intext, attrs, expected in sm_tests:
yield runTreewalkerEditTest, intext, expected, attrs, tree
yield intext, expected, attrs, tree


@pytest.mark.parametrize("intext, expected, attrs_to_add, tree", param_treewalker_six_mix())
def test_treewalker_six_mix(intext, expected, attrs_to_add, tree):
"""tests what happens when we add attributes to the intext"""
treeName, treeClass = tree
if treeClass is None:
pytest.skip("Treebuilder not loaded")
parser = html5parser.HTMLParser(tree=treeClass["builder"])
document = parser.parseFragment(intext)
for nom, val in attrs_to_add:
set_attribute_on_first_child(document, nom, val, treeName)

document = treeClass.get("adapter", lambda x: x)(document)
output = treewalkers.pprint(treeClass["walker"](document))
output = attrlist.sub(sortattrs, output)
if output not in expected:
raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))


@pytest.mark.parametrize("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"]))
Expand Down
12 changes: 6 additions & 6 deletions html5lib/tests/tree_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@ def _getParserTests(self, treeName, treeAPIs):
item.add_marker(pytest.mark.parser)
if namespaceHTMLElements:
item.add_marker(pytest.mark.namespaced)
if treeAPIs is None:
item.add_marker(pytest.mark.skipif(True, reason="Treebuilder not loaded"))
yield item

def _getTreeWalkerTests(self, treeName, treeAPIs):
Expand All @@ -69,8 +67,6 @@ def _getTreeWalkerTests(self, treeName, treeAPIs):
treeAPIs)
item.add_marker(getattr(pytest.mark, treeName))
item.add_marker(pytest.mark.treewalker)
if treeAPIs is None:
item.add_marker(pytest.mark.skipif(True, reason="Treebuilder not loaded"))
yield item


Expand All @@ -84,12 +80,14 @@ def convertTreeDump(data):
class ParserTest(pytest.Item):
def __init__(self, name, parent, test, treeClass, namespaceHTMLElements):
super(ParserTest, self).__init__(name, parent)
self.obj = lambda: 1 # this is to hack around skipif needing a function!
self.test = test
self.treeClass = treeClass
self.namespaceHTMLElements = namespaceHTMLElements

def runtest(self):
if self.treeClass is None:
pytest.skip("Treebuilder not loaded")

p = html5parser.HTMLParser(tree=self.treeClass,
namespaceHTMLElements=self.namespaceHTMLElements)

Expand Down Expand Up @@ -147,11 +145,13 @@ def repr_failure(self, excinfo):
class TreeWalkerTest(pytest.Item):
def __init__(self, name, parent, test, treeAPIs):
super(TreeWalkerTest, self).__init__(name, parent)
self.obj = lambda: 1 # this is to hack around skipif needing a function!
self.test = test
self.treeAPIs = treeAPIs

def runtest(self):
if self.treeAPIs is None:
pytest.skip("Treebuilder not loaded")

p = html5parser.HTMLParser(tree=self.treeAPIs["builder"])

input = self.test['data']
Expand Down
Loading