From c64bfca788e2f858d336105eb7da18399a7894c6 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Thu, 3 Dec 2015 18:21:33 +0000 Subject: [PATCH 01/27] Get rid of mutable default arguments --- html5lib/html5parser.py | 4 +++- html5lib/treebuilders/etree_lxml.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index e6808425..43c1dc61 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -259,8 +259,10 @@ def parseFragment(self, stream, container="div", encoding=None, encoding=encoding, scripting=scripting) return self.tree.getFragment() - def parseError(self, errorcode="XXX-undefined-error", datavars={}): + def parseError(self, errorcode="XXX-undefined-error", datavars=None): # XXX The idea is to make errorcode mandatory. + if datavars is None: + datavars = {} self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) if self.strict: raise ParseError(E[errorcode] % datavars) diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py index 138b30bd..79a4d4c5 100644 --- a/html5lib/treebuilders/etree_lxml.py +++ b/html5lib/treebuilders/etree_lxml.py @@ -193,7 +193,9 @@ def __init__(self, namespaceHTMLElements, fullTree=False): self.namespaceHTMLElements = namespaceHTMLElements class Attributes(dict): - def __init__(self, element, value={}): + def __init__(self, element, value=None): + if value is None: + value = {} self._element = element dict.__init__(self, value) for key, value in self.items(): From c1c16ceed7ff484b6ce056b0a377404aa06e01f7 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Fri, 4 Dec 2015 02:14:15 +0000 Subject: [PATCH 02/27] Avoid noisiness from pylint and the parser's set patterns --- html5lib/html5parser.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 43c1dc61..aad6a059 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -460,6 +460,7 @@ def getMetaclass(use_metaclass, metaclass_func): else: return type + # pylint:disable=unused-argument class Phase(with_metaclass(getMetaclass(debug, log))): """Base class for helper object that implements each phase of processing """ @@ -2765,6 +2766,7 @@ def startTagOther(self, token): def processEndTag(self, token): self.parser.parseError("expected-eof-but-got-end-tag", {"name": token["name"]}) + # pylint:enable=unused-argument return { "initial": InitialPhase, From 2c3b64b0b9cbd7ffcd67f3ddae93a0a8d75af908 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Fri, 20 May 2016 17:23:40 +0100 Subject: [PATCH 03/27] add pep8/flake8 config to get something useful happening with them this makes run-flake8.sh just call flake8 simply --- flake8-run.sh | 7 ++----- html5lib/filters/sanitizer.py | 2 +- html5lib/ihatexml.py | 4 ++-- html5lib/inputstream.py | 2 +- html5lib/tests/test_sanitizer.py | 3 ++- setup.cfg | 9 +++++++++ 6 files changed, 17 insertions(+), 10 deletions(-) diff --git a/flake8-run.sh b/flake8-run.sh index 685ec6ab..b175ec80 100755 --- a/flake8-run.sh +++ b/flake8-run.sh @@ -5,8 +5,5 @@ if [[ ! -x $(which flake8) ]]; then exit 1 fi -find html5lib/ -name '*.py' -and -not -name 'constants.py' -print0 | xargs -0 flake8 --ignore=E501 -flake1=$? -flake8 --max-line-length=99 --ignore=E126 html5lib/constants.py -flake2=$? -exit $[$flake1 || $flake2] +flake8 html5lib +exit $? diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py index caddd318..fdd4181d 100644 --- a/html5lib/filters/sanitizer.py +++ b/html5lib/filters/sanitizer.py @@ -862,7 +862,7 @@ def sanitize_css(self, style): 'padding']: for keyword in value.split(): if keyword not in self.allowed_css_keywords and \ - not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): + not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa break else: clean.append(prop + ': ' + value + ';') diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py index 5da5d938..57fec9d6 100644 --- a/html5lib/ihatexml.py +++ b/html5lib/ihatexml.py @@ -175,9 +175,9 @@ def escapeRegexp(string): return string # output from the above -nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') +nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa -nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') +nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa # Simpler things nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]") diff --git a/html5lib/inputstream.py b/html5lib/inputstream.py index 15acba0d..5cfc2cc5 100644 --- a/html5lib/inputstream.py +++ b/html5lib/inputstream.py @@ -32,7 +32,7 @@ class BufferedIOBase(object): spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"]) -invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" +invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa if utils.supports_lone_surrogates: # Use one extra step of indirection and create surrogates with diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py index 1f8a06f6..9f8ae22c 100644 --- a/html5lib/tests/test_sanitizer.py +++ b/html5lib/tests/test_sanitizer.py @@ -63,7 +63,8 @@ def test_sanitizer(): for ns, tag_name in sanitizer.allowed_elements: if ns != constants.namespaces["html"]: continue - if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'select']: + if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', + 'tfoot', 'th', 'thead', 'tr', 'select']: continue # TODO if tag_name == 'image': yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name, diff --git a/setup.cfg b/setup.cfg index 2a9acf13..3152ac54 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,11 @@ [bdist_wheel] universal = 1 + +[pep8] +ignore = N +max-line-length = 139 +exclude = .git,__pycache__,.tox,doc + +[flake8] +ignore = N +max-line-length = 139 From 823864882ee969ebb7c16986a80388d5785cb9ea Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Fri, 20 May 2016 17:37:55 +0100 Subject: [PATCH 04/27] Fix all the files outside of html5lib to flake8 cleanly --- flake8-run.sh | 2 +- parse.py | 31 +++++++++++++++++------------ setup.py | 12 ++++++------ utils/entities.py | 50 +++++++++++++++++++++++++++++------------------ utils/spider.py | 43 +++++++++++++++++++++------------------- 5 files changed, 80 insertions(+), 58 deletions(-) diff --git a/flake8-run.sh b/flake8-run.sh index b175ec80..d9264946 100755 --- a/flake8-run.sh +++ b/flake8-run.sh @@ -5,5 +5,5 @@ if [[ ! -x $(which flake8) ]]; then exit 1 fi -flake8 html5lib +flake8 `dirname $0` exit $? diff --git a/parse.py b/parse.py index cceea84d..2ed8f1c2 100755 --- a/parse.py +++ b/parse.py @@ -5,7 +5,6 @@ """ import sys -import os import traceback from optparse import OptionParser @@ -15,9 +14,10 @@ from html5lib import constants from html5lib import utils + def parse(): optParser = getOptParser() - opts,args = optParser.parse_args() + opts, args = optParser.parse_args() encoding = "utf8" try: @@ -25,7 +25,10 @@ def parse(): # Try opening from the internet if f.startswith('http://'): try: - import urllib.request, urllib.parse, urllib.error, cgi + import urllib.request + import urllib.parse + import urllib.error + import cgi f = urllib.request.urlopen(f) contentType = f.headers.get('content-type') if contentType: @@ -41,7 +44,7 @@ def parse(): try: # Try opening from file system f = open(f, "rb") - except IOError as e: + except IOError as e: sys.stderr.write("Unable to open file: %s\n" % e) sys.exit(1) except IndexError: @@ -82,14 +85,15 @@ def parse(): if document: printOutput(p, document, opts) t2 = time.time() - sys.stderr.write("\n\nRun took: %fs (plus %fs to print the output)"%(t1-t0, t2-t1)) + sys.stderr.write("\n\nRun took: %fs (plus %fs to print the output)" % (t1 - t0, t2 - t1)) else: - sys.stderr.write("\n\nRun took: %fs"%(t1-t0)) + sys.stderr.write("\n\nRun took: %fs" % (t1 - t0)) else: document = run(parseMethod, f, encoding, opts.scripting) if document: printOutput(p, document, opts) + def run(parseMethod, f, encoding, scripting): try: document = parseMethod(f, encoding=encoding, scripting=scripting) @@ -98,6 +102,7 @@ def run(parseMethod, f, encoding, scripting): traceback.print_exc() return document + def printOutput(parser, document, opts): if opts.encoding: print("Encoding:", parser.tokenizer.stream.charEncoding) @@ -116,7 +121,7 @@ def printOutput(parser, document, opts): elif tb == "etree": sys.stdout.write(utils.default_etree.tostring(document)) elif opts.tree: - if not hasattr(document,'__getitem__'): + if not hasattr(document, '__getitem__'): document = [document] for fragment in document: print(parser.tree.testSerializer(fragment)) @@ -126,7 +131,7 @@ def printOutput(parser, document, opts): kwargs = {} for opt in serializer.HTMLSerializer.options: try: - kwargs[opt] = getattr(opts,opt) + kwargs[opt] = getattr(opts, opt) except: pass if not kwargs['quote_char']: @@ -142,12 +147,14 @@ def printOutput(parser, document, opts): encoding = "utf-8" for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding=encoding): sys.stdout.write(text) - if not text.endswith('\n'): sys.stdout.write('\n') + if not text.endswith('\n'): + sys.stdout.write('\n') if opts.error: - errList=[] + errList = [] for pos, errorcode, datavars in parser.errors: - errList.append("Line %i Col %i"%pos + " " + constants.E.get(errorcode, 'Unknown error "%s"' % errorcode) % datavars) - sys.stdout.write("\nParse errors:\n" + "\n".join(errList)+"\n") + errList.append("Line %i Col %i" % pos + " " + constants.E.get(errorcode, 'Unknown error "%s"' % errorcode) % datavars) + sys.stdout.write("\nParse errors:\n" + "\n".join(errList) + "\n") + def getOptParser(): parser = OptionParser(usage=__doc__) diff --git a/setup.py b/setup.py index b6ea24af..b42ba400 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ from setuptools import setup -classifiers=[ +classifiers = [ 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Developers', 'License :: OSI Approved :: MIT License', @@ -20,9 +20,9 @@ 'Programming Language :: Python :: 3.5', 'Topic :: Software Development :: Libraries :: Python Modules', 'Topic :: Text Processing :: Markup :: HTML' - ] +] -packages = ['html5lib'] + ['html5lib.'+name +packages = ['html5lib'] + ['html5lib.' + name for name in os.listdir(os.path.join('html5lib')) if os.path.isdir(os.path.join('html5lib', name)) and not name.startswith('.') and name != 'tests'] @@ -39,9 +39,9 @@ assignments = filter(lambda x: isinstance(x, ast.Assign), t.body) for a in assignments: if (len(a.targets) == 1 and - isinstance(a.targets[0], ast.Name) and - a.targets[0].id == "__version__" and - isinstance(a.value, ast.Str)): + isinstance(a.targets[0], ast.Name) and + a.targets[0].id == "__version__" and + isinstance(a.value, ast.Str)): version = a.value.s setup(name='html5lib', diff --git a/utils/entities.py b/utils/entities.py index 116a27cb..6dccf5f0 100644 --- a/utils/entities.py +++ b/utils/entities.py @@ -2,50 +2,59 @@ import html5lib + def parse(path="html5ents.xml"): return html5lib.parse(open(path), treebuilder="lxml") + def entity_table(tree): return dict((entity_name("".join(tr[0].xpath(".//text()"))), entity_characters(tr[1].text)) for tr in tree.xpath("//h:tbody/h:tr", - namespaces={"h":"http://www.w3.org/1999/xhtml"})) + namespaces={"h": "http://www.w3.org/1999/xhtml"})) + def entity_name(inp): return inp.strip() + def entity_characters(inp): return "".join(codepoint_to_character(item) - for item in inp.split() - if item) + for item in inp.split() + if item) + def codepoint_to_character(inp): - return ("\U000"+inp[2:]).decode("unicode-escape") + return ("\\U000" + inp[2:]).decode("unicode-escape") + def make_tests_json(entities): test_list = make_test_list(entities) tests_json = {"tests": - [make_test(*item) for item in test_list] + [make_test(*item) for item in test_list] } return tests_json + def make_test(name, characters, good): return { - "description":test_description(name, good), - "input":"&%s"%name, - "output":test_expected(name, characters, good) - } + "description": test_description(name, good), + "input": "&%s" % name, + "output": test_expected(name, characters, good) + } + def test_description(name, good): with_semicolon = name.endswith(";") - semicolon_text = {True:"with a semi-colon", - False:"without a semi-colon"}[with_semicolon] + semicolon_text = {True: "with a semi-colon", + False: "without a semi-colon"}[with_semicolon] if good: - text = "Named entity: %s %s"%(name, semicolon_text) + text = "Named entity: %s %s" % (name, semicolon_text) else: - text = "Bad named entity: %s %s"%(name, semicolon_text) + text = "Bad named entity: %s %s" % (name, semicolon_text) return text + def test_expected(name, characters, good): rv = [] if not good or not name.endswith(";"): @@ -53,6 +62,7 @@ def test_expected(name, characters, good): rv.append(["Character", characters]) return rv + def make_test_list(entities): tests = [] for entity_name, characters in entities.items(): @@ -61,20 +71,23 @@ def make_test_list(entities): tests.append((entity_name, characters, True)) return sorted(tests) + def subentity_exists(entity_name, entities): for i in range(1, len(entity_name)): if entity_name[:-i] in entities: return True return False + def make_entities_code(entities): - entities_text = "\n".join(" \"%s\": u\"%s\","%( - name, entities[name].encode( - "unicode-escape").replace("\"", "\\\"")) - for name in sorted(entities.keys())) + entities_text = "\n".join(" \"%s\": u\"%s\"," % ( + name, entities[name].encode( + "unicode-escape").replace("\"", "\\\"")) + for name in sorted(entities.keys())) return """entities = { %s -}"""%entities_text +}""" % entities_text + def main(): entities = entity_table(parse()) @@ -85,4 +98,3 @@ def main(): if __name__ == "__main__": main() - diff --git a/utils/spider.py b/utils/spider.py index ac5f9fbe..3a325888 100644 --- a/utils/spider.py +++ b/utils/spider.py @@ -7,7 +7,9 @@ s.spider("http://www.google.com", maxURLs=100) """ -import urllib.request, urllib.error, urllib.parse +import urllib.request +import urllib.error +import urllib.parse import urllib.robotparser import md5 @@ -16,11 +18,13 @@ import html5lib from html5lib.treebuilders import etree + class Spider(object): + def __init__(self): self.unvisitedURLs = set() self.visitedURLs = set() - self.buggyURLs=set() + self.buggyURLs = set() self.robotParser = urllib.robotparser.RobotFileParser() self.contentDigest = {} self.http = httplib2.Http(".cache") @@ -70,18 +74,18 @@ def updateURLs(self, tree): update the list of visited and unvisited URLs according to whether we have seen them before or not""" urls = set() - #Remove all links we have already visited + # Remove all links we have already visited for link in tree.findall(".//a"): - try: - url = urllib.parse.urldefrag(link.attrib['href'])[0] - if (url and url not in self.unvisitedURLs and url + try: + url = urllib.parse.urldefrag(link.attrib['href'])[0] + if (url and url not in self.unvisitedURLs and url not in self.visitedURLs): - urls.add(url) - except KeyError: - pass + urls.add(url) + except KeyError: + pass - #Remove all non-http URLs and add a suitable base URL where that is - #missing + # Remove all non-http URLs and add a suitable base URL where that is + # missing newUrls = set() for url in urls: splitURL = list(urllib.parse.urlsplit(url)) @@ -93,23 +97,22 @@ def updateURLs(self, tree): urls = newUrls responseHeaders = {} - #Now we want to find the content types of the links we haven't visited + # Now we want to find the content types of the links we haven't visited for url in urls: try: resp, content = self.http.request(url, "HEAD") responseHeaders[url] = resp - except AttributeError as KeyError: - #Don't know why this happens + except AttributeError: + # Don't know why this happens pass - - #Remove links not of content-type html or pages not found - #XXX - need to deal with other status codes? + # Remove links not of content-type html or pages not found + # XXX - need to deal with other status codes? toVisit = set([url for url in urls if url in responseHeaders and - "html" in responseHeaders[url]['content-type'] and - responseHeaders[url]['status'] == "200"]) + "html" in responseHeaders[url]['content-type'] and + responseHeaders[url]['status'] == "200"]) - #Now check we are allowed to spider the page + # Now check we are allowed to spider the page for url in toVisit: robotURL = list(urllib.parse.urlsplit(url)[:2]) robotURL.extend(["robots.txt", "", ""]) From de6bcf22e8171e06b0e07558b699075f1b970dd0 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Fri, 20 May 2016 18:12:08 +0100 Subject: [PATCH 05/27] Fix incorrectly hidden flake8 errors --- html5lib/tests/support.py | 10 +++++----- html5lib/tests/test_encoding.py | 6 +++--- html5lib/tests/test_parser2.py | 20 +++++++++----------- html5lib/tests/test_stream.py | 28 ++++++++++++++++++++++++++-- html5lib/tests/test_treeadapters.py | 4 ++-- html5lib/tokenizer.py | 6 +++--- html5lib/treeadapters/__init__.py | 2 +- 7 files changed, 49 insertions(+), 27 deletions(-) diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py index 6e6a916b..5f3cc619 100644 --- a/html5lib/tests/support.py +++ b/html5lib/tests/support.py @@ -13,7 +13,7 @@ os.path.pardir, os.path.pardir))) -from html5lib import treebuilders, treewalkers, treeadapters +from html5lib import treebuilders, treewalkers, treeadapters # noqa del base_path # Build a dict of available trees @@ -26,14 +26,14 @@ } # ElementTree impls -import xml.etree.ElementTree as ElementTree +import xml.etree.ElementTree as ElementTree # noqa treeTypes['ElementTree'] = { "builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True), "walker": treewalkers.getTreeWalker("etree", ElementTree) } try: - import xml.etree.cElementTree as cElementTree + import xml.etree.cElementTree as cElementTree # noqa except ImportError: treeTypes['cElementTree'] = None else: @@ -47,7 +47,7 @@ } try: - import lxml.etree as lxml # flake8: noqa + import lxml.etree as lxml # noqa except ImportError: treeTypes['lxml'] = None else: @@ -58,7 +58,7 @@ # Genshi impls try: - import genshi # flake8: noqa + import genshi # noqa except ImportError: pass else: diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py index 09504654..16dd1189 100644 --- a/html5lib/tests/test_encoding.py +++ b/html5lib/tests/test_encoding.py @@ -57,13 +57,13 @@ def test_encoding(): try: try: - import charade # flake8: noqa + import charade # noqa except ImportError: - import chardet # flake8: noqa + import chardet # noqa except ImportError: print("charade/chardet not found, skipping chardet tests") else: def test_chardet(): - with open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt"), "rb") as fp: + with open(os.path.join(test_dir, "encoding", "chardet", "test_big5.txt"), "rb") as fp: encoding = inputstream.HTMLInputStream(fp.read()).charEncoding assert encoding[0].name == "big5" diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py index 2f3ba2c8..f8e1ac43 100644 --- a/html5lib/tests/test_parser2.py +++ b/html5lib/tests/test_parser2.py @@ -2,10 +2,8 @@ import io -import pytest +from . import support # noqa -from . import support # flake8: noqa -from html5lib import html5parser from html5lib.constants import namespaces from html5lib import parse @@ -23,29 +21,29 @@ def test_line_counter(): def test_namespace_html_elements_0_dom(): doc = parse("", - treebuilder="dom", - namespaceHTMLElements=True) + treebuilder="dom", + namespaceHTMLElements=True) assert doc.childNodes[0].namespaceURI == namespaces["html"] def test_namespace_html_elements_1_dom(): doc = parse("", - treebuilder="dom", - namespaceHTMLElements=False) + treebuilder="dom", + namespaceHTMLElements=False) assert doc.childNodes[0].namespaceURI is None def test_namespace_html_elements_0_etree(): doc = parse("", - treebuilder="etree", - namespaceHTMLElements=True) + treebuilder="etree", + namespaceHTMLElements=True) assert doc.tag == "{%s}html" % (namespaces["html"],) def test_namespace_html_elements_1_etree(): doc = parse("", - treebuilder="etree", - namespaceHTMLElements=False) + treebuilder="etree", + namespaceHTMLElements=False) assert doc.tag == "html" diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py index 3b659fbb..a92ee0a3 100644 --- a/html5lib/tests/test_stream.py +++ b/html5lib/tests/test_stream.py @@ -1,9 +1,9 @@ from __future__ import absolute_import, division, unicode_literals -from . import support # flake8: noqa +from . import support # noqa + import codecs from io import BytesIO -import socket import six from six.moves import http_client, urllib @@ -11,12 +11,14 @@ from html5lib.inputstream import (BufferedStream, HTMLInputStream, HTMLUnicodeInputStream, HTMLBinaryInputStream) + def test_basic(): s = b"abc" fp = BufferedStream(BytesIO(s)) read = fp.read(10) assert read == s + def test_read_length(): fp = BufferedStream(BytesIO(b"abcdef")) read1 = fp.read(1) @@ -28,17 +30,23 @@ def test_read_length(): read4 = fp.read(4) assert read4 == b"" + def test_tell(): fp = BufferedStream(BytesIO(b"abcdef")) read1 = fp.read(1) + assert read1 == b"a" assert fp.tell() == 1 read2 = fp.read(2) + assert read2 == b"bc" assert fp.tell() == 3 read3 = fp.read(3) + assert read3 == b"def" assert fp.tell() == 6 read4 = fp.read(4) + assert read4 == b"" assert fp.tell() == 6 + def test_seek(): fp = BufferedStream(BytesIO(b"abcdef")) read1 = fp.read(1) @@ -55,20 +63,26 @@ def test_seek(): read5 = fp.read(2) assert read5 == b"ef" + def test_seek_tell(): fp = BufferedStream(BytesIO(b"abcdef")) read1 = fp.read(1) + assert read1 == b"a" assert fp.tell() == 1 fp.seek(0) read2 = fp.read(1) + assert read2 == b"a" assert fp.tell() == 1 read3 = fp.read(2) + assert read3 == b"bc" assert fp.tell() == 3 fp.seek(2) read4 = fp.read(2) + assert read4 == b"cd" assert fp.tell() == 4 fp.seek(4) read5 = fp.read(2) + assert read5 == b"ef" assert fp.tell() == 6 @@ -85,11 +99,13 @@ def test_char_ascii(): assert stream.charEncoding[0].name == 'windows-1252' assert stream.char() == "'" + def test_char_utf8(): stream = HTMLInputStream('\u2018'.encode('utf-8'), encoding='utf-8') assert stream.charEncoding[0].name == 'utf-8' assert stream.char() == '\u2018' + def test_char_win1252(): stream = HTMLInputStream("\xa9\xf1\u2019".encode('windows-1252')) assert stream.charEncoding[0].name == 'windows-1252' @@ -97,16 +113,19 @@ def test_char_win1252(): assert stream.char() == "\xf1" assert stream.char() == "\u2019" + def test_bom(): stream = HTMLInputStream(codecs.BOM_UTF8 + b"'") assert stream.charEncoding[0].name == 'utf-8' assert stream.char() == "'" + def test_utf_16(): stream = HTMLInputStream((' ' * 1025).encode('utf-16')) assert stream.charEncoding[0].name in ['utf-16le', 'utf-16be'] assert len(stream.charsUntil(' ', True)) == 1025 + def test_newlines(): stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\r\nccc\rddddxe") assert stream.position() == (1, 0) @@ -117,11 +136,13 @@ def test_newlines(): assert stream.charsUntil('e') == "x" assert stream.position() == (4, 5) + def test_newlines2(): size = HTMLUnicodeInputStream._defaultChunkSize stream = HTMLInputStream("\r" * size + "\n") assert stream.charsUntil('x') == "\n" * size + def test_position(): stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\nccc\nddde\nf\ngh") assert stream.position() == (1, 0) @@ -140,6 +161,7 @@ def test_position(): assert stream.charsUntil('h') == "e\nf\ng" assert stream.position() == (6, 1) + def test_position2(): stream = HTMLUnicodeInputStreamShortChunk("abc\nd") assert stream.position() == (1, 0) @@ -154,6 +176,7 @@ def test_position2(): assert stream.char() == "d" assert stream.position() == (2, 1) + def test_python_issue_20007(): """ Make sure we have a work-around for Python bug #20007 @@ -168,6 +191,7 @@ def makefile(self, _mode, _bufsize=None): stream = HTMLInputStream(source) assert stream.charsUntil(" ") == "Text" + def test_python_issue_20007_b(): """ Make sure we have a work-around for Python bug #20007 diff --git a/html5lib/tests/test_treeadapters.py b/html5lib/tests/test_treeadapters.py index 5f38b6c3..95e56c00 100644 --- a/html5lib/tests/test_treeadapters.py +++ b/html5lib/tests/test_treeadapters.py @@ -1,6 +1,6 @@ from __future__ import absolute_import, division, unicode_literals -from . import support # flake8: noqa +from . import support # noqa import html5lib from html5lib.treeadapters import sax @@ -25,7 +25,7 @@ def test_to_sax(): ('endElementNS', ('http://www.w3.org/1999/xhtml', 'title'), 'title'), ('characters', '\n '), ('endElementNS', ('http://www.w3.org/1999/xhtml', 'head'), 'head'), - ('startElementNS', ('http://www.w3.org/1999/xhtml', 'body'), 'body', {}), + ('startElementNS', ('http://www.w3.org/1999/xhtml', 'body'), 'body', {}), ('startElementNS', ('http://www.w3.org/1999/xhtml', 'a'), 'a', {(None, 'href'): '/'}), ('startElementNS', ('http://www.w3.org/1999/xhtml', 'b'), 'b', {}), ('startElementNS', ('http://www.w3.org/1999/xhtml', 'p'), 'p', {}), diff --git a/html5lib/tokenizer.py b/html5lib/tokenizer.py index 79774578..3d21c32d 100644 --- a/html5lib/tokenizer.py +++ b/html5lib/tokenizer.py @@ -1,7 +1,7 @@ from __future__ import absolute_import, division, unicode_literals try: - chr = unichr # flake8: noqa + chr = unichr # noqa except NameError: pass @@ -147,8 +147,8 @@ def consumeEntity(self, allowedChar=None, fromAttribute=False): output = "&" charStack = [self.stream.char()] - if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") - or (allowedChar is not None and allowedChar == charStack[0])): + if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or + (allowedChar is not None and allowedChar == charStack[0])): self.stream.unget(charStack[0]) elif charStack[0] == "#": diff --git a/html5lib/treeadapters/__init__.py b/html5lib/treeadapters/__init__.py index 57d71304..4f978466 100644 --- a/html5lib/treeadapters/__init__.py +++ b/html5lib/treeadapters/__init__.py @@ -5,7 +5,7 @@ __all__ = ["sax"] try: - from . import genshi # flake8: noqa + from . import genshi # noqa except ImportError: pass else: From 0bd31c4251889a4216b8ac3a59e5833534643e48 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Fri, 20 May 2016 18:58:42 +0100 Subject: [PATCH 06/27] Get rid of type()-based type-check --- html5lib/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/html5lib/utils.py b/html5lib/utils.py index c70de172..c83a089f 100644 --- a/html5lib/utils.py +++ b/html5lib/utils.py @@ -52,7 +52,7 @@ def __init__(self, items=()): # anything here. _dictEntries = [] for name, value in items: - if type(name) in (list, tuple, frozenset, set): + if isinstance(name, (list, tuple, frozenset, set)): for item in name: _dictEntries.append((item, value)) else: From d440a830fb75beafed838327c21e9a8a773c9743 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Fri, 20 May 2016 19:16:57 +0100 Subject: [PATCH 07/27] Silence pytest unused-variable warnings --- html5lib/ihatexml.py | 2 +- html5lib/inputstream.py | 4 ++-- html5lib/serializer/htmlserializer.py | 2 +- html5lib/tests/test_encoding.py | 2 +- html5lib/tests/test_serializer.py | 2 +- html5lib/tests/test_treewalkers.py | 2 +- html5lib/tokenizer.py | 4 ++-- html5lib/treebuilders/dom.py | 2 +- html5lib/treebuilders/etree.py | 4 ++-- html5lib/treewalkers/etree.py | 4 ++-- html5lib/treewalkers/genshistream.py | 2 +- 11 files changed, 15 insertions(+), 15 deletions(-) diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py index 57fec9d6..738d2457 100644 --- a/html5lib/ihatexml.py +++ b/html5lib/ihatexml.py @@ -232,7 +232,7 @@ def coerceComment(self, data): def coerceCharacters(self, data): if self.replaceFormFeedCharacters: - for i in range(data.count("\x0C")): + for _ in range(data.count("\x0C")): warnings.warn("Text cannot contain U+000C", DataLossWarning) data = data.replace("\x0C", " ") # Other non-xml characters diff --git a/html5lib/inputstream.py b/html5lib/inputstream.py index 5cfc2cc5..4231ae19 100644 --- a/html5lib/inputstream.py +++ b/html5lib/inputstream.py @@ -296,7 +296,7 @@ def readChunk(self, chunkSize=None): return True def characterErrorsUCS4(self, data): - for i in range(len(invalid_unicode_re.findall(data))): + for _ in range(len(invalid_unicode_re.findall(data))): self.errors.append("invalid-codepoint") def characterErrorsUCS2(self, data): @@ -681,7 +681,7 @@ def getEncoding(self): (b" 0: - for i in range(nullCount): + for _ in range(nullCount): self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) data = data.replace("\u0000", "\uFFFD") diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py index 8656244f..27432c7a 100644 --- a/html5lib/treebuilders/dom.py +++ b/html5lib/treebuilders/dom.py @@ -109,7 +109,7 @@ def getNameTuple(self): nameTuple = property(getNameTuple) - class TreeBuilder(_base.TreeBuilder): + class TreeBuilder(_base.TreeBuilder): # pylint:disable=unused-variable def documentClass(self): self.dom = Dom.getDOMImplementation().createDocument(None, None, None) return weakref.proxy(self) diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py index 2c8ed19f..b607948b 100644 --- a/html5lib/treebuilders/etree.py +++ b/html5lib/treebuilders/etree.py @@ -253,7 +253,7 @@ def serializeElement(element, indent=0): return "\n".join(rv) - def tostring(element): + def tostring(element): # pylint:disable=unused-variable """Serialize an element and its child nodes to a string""" rv = [] filter = ihatexml.InfosetFilter() @@ -307,7 +307,7 @@ def serializeElement(element): return "".join(rv) - class TreeBuilder(_base.TreeBuilder): + class TreeBuilder(_base.TreeBuilder): # pylint:disable=unused-variable documentClass = Document doctypeClass = DocumentType elementClass = Element diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py index 73c8e26a..d3b0c50e 100644 --- a/html5lib/treewalkers/etree.py +++ b/html5lib/treewalkers/etree.py @@ -22,7 +22,7 @@ def getETreeBuilder(ElementTreeImplementation): ElementTree = ElementTreeImplementation ElementTreeCommentType = ElementTree.Comment("asd").tag - class TreeWalker(_base.NonRecursiveTreeWalker): + class TreeWalker(_base.NonRecursiveTreeWalker): # pylint:disable=unused-variable """Given the particular ElementTree representation, this implementation, to avoid using recursion, returns "nodes" as tuples with the following content: @@ -38,7 +38,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker): """ def getNodeDetails(self, node): if isinstance(node, tuple): # It might be the root Element - elt, key, parents, flag = node + elt, _, _, flag = node if flag in ("text", "tail"): return _base.TEXT, getattr(elt, flag) else: diff --git a/html5lib/treewalkers/genshistream.py b/html5lib/treewalkers/genshistream.py index 83cd1654..61cbfede 100644 --- a/html5lib/treewalkers/genshistream.py +++ b/html5lib/treewalkers/genshistream.py @@ -25,7 +25,7 @@ def __iter__(self): yield token def tokens(self, event, next): - kind, data, pos = event + kind, data, _ = event if kind == START: tag, attribs = data name = tag.localname From 5c1d8e2743383b3875ef840cc0ab842dbc1e7618 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Fri, 20 May 2016 19:21:57 +0100 Subject: [PATCH 08/27] Remove duplicate entry from constants.replacementCharacters --- html5lib/constants.py | 1 - 1 file changed, 1 deletion(-) diff --git a/html5lib/constants.py b/html5lib/constants.py index 2244933c..df1f061e 100644 --- a/html5lib/constants.py +++ b/html5lib/constants.py @@ -2819,7 +2819,6 @@ 0x0d: "\u000D", 0x80: "\u20AC", 0x81: "\u0081", - 0x81: "\u0081", 0x82: "\u201A", 0x83: "\u0192", 0x84: "\u201E", From 1b86ccbeec08069d1a40cd22d0dcc8492bdd789a Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Fri, 20 May 2016 19:23:44 +0100 Subject: [PATCH 09/27] Remove gratuitious argument in sanitizer --- html5lib/filters/sanitizer.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py index fdd4181d..7f81c0d1 100644 --- a/html5lib/filters/sanitizer.py +++ b/html5lib/filters/sanitizer.py @@ -765,15 +765,15 @@ def sanitize_token(self, token): if ((namespace, name) in self.allowed_elements or (namespace is None and (namespaces["html"], name) in self.allowed_elements)): - return self.allowed_token(token, token_type) + return self.allowed_token(token) else: - return self.disallowed_token(token, token_type) + return self.disallowed_token(token) elif token_type == "Comment": pass else: return token - def allowed_token(self, token, token_type): + def allowed_token(self, token): if "data" in token: attrs = token["data"] attr_names = set(attrs.keys()) @@ -823,7 +823,8 @@ def allowed_token(self, token, token_type): token["data"] = attrs return token - def disallowed_token(self, token, token_type): + def disallowed_token(self, token): + token_type = token["type"] if token_type == "EndTag": token["data"] = "" % token["name"] elif token["data"]: From 82d623bc8287d00db13ca98bf9e6d7a1921c6a56 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Fri, 20 May 2016 19:39:29 +0100 Subject: [PATCH 10/27] Silence redefined-variable-type --- html5lib/html5parser.py | 2 +- html5lib/tokenizer.py | 2 +- html5lib/treewalkers/lxmletree.py | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index aad6a059..86b3e609 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -121,7 +121,7 @@ def reset(self): self.phase.insertHtmlElement() self.resetInsertionMode() else: - self.innerHTML = False + self.innerHTML = False # pylint:disable=redefined-variable-type self.phase = self.phases["initial"] self.lastPhase = None diff --git a/html5lib/tokenizer.py b/html5lib/tokenizer.py index ef7a7b1f..50e505a9 100644 --- a/html5lib/tokenizer.py +++ b/html5lib/tokenizer.py @@ -1716,7 +1716,7 @@ def cdataSectionState(self): else: data.append(char) - data = "".join(data) + data = "".join(data) # pylint:disable=redefined-variable-type # Deal with null here rather than in the parser nullCount = data.count("\u0000") if nullCount > 0: diff --git a/html5lib/treewalkers/lxmletree.py b/html5lib/treewalkers/lxmletree.py index 36850086..7d99adc2 100644 --- a/html5lib/treewalkers/lxmletree.py +++ b/html5lib/treewalkers/lxmletree.py @@ -117,6 +117,7 @@ def __len__(self): class TreeWalker(_base.NonRecursiveTreeWalker): def __init__(self, tree): + # pylint:disable=redefined-variable-type if hasattr(tree, "getroot"): self.fragmentChildren = set() tree = Root(tree) From a017b8881f42b2ab21a2f47af993ba6d58b25ca2 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Fri, 20 May 2016 19:59:08 +0100 Subject: [PATCH 11/27] Silence unused-argument --- html5lib/html5parser.py | 2 ++ html5lib/ihatexml.py | 4 ++-- html5lib/inputstream.py | 1 + html5lib/serializer/htmlserializer.py | 2 +- html5lib/tests/test_sanitizer.py | 2 +- html5lib/tests/test_stream.py | 2 ++ html5lib/tests/tokenizer.py | 1 + html5lib/treebuilders/etree_lxml.py | 2 ++ 8 files changed, 12 insertions(+), 4 deletions(-) diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 86b3e609..66ad7430 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -241,6 +241,7 @@ def parse(self, stream, encoding=None, parseMeta=True, def parseFragment(self, stream, container="div", encoding=None, parseMeta=False, useChardet=True, scripting=False): + # pylint:disable=unused-argument """Parse a HTML fragment into a well-formed tree fragment container - name of the element we're setting the innerHTML property @@ -363,6 +364,7 @@ def adjustForeignAttributes(self, token): del token["data"][originalName] def reparseTokenNormal(self, token): + # pylint:disable=unused-argument self.parser.phase() def resetInsertionMode(self): diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py index 738d2457..d6d1d6fb 100644 --- a/html5lib/ihatexml.py +++ b/html5lib/ihatexml.py @@ -186,7 +186,7 @@ def escapeRegexp(string): class InfosetFilter(object): replacementRegexp = re.compile(r"U[\dA-F]{5,5}") - def __init__(self, replaceChars=None, + def __init__(self, dropXmlnsLocalName=False, dropXmlnsAttrNs=False, preventDoubleDashComments=False, @@ -217,7 +217,7 @@ def coerceAttribute(self, name, namespace=None): else: return self.toXmlName(name) - def coerceElement(self, name, namespace=None): + def coerceElement(self, name): return self.toXmlName(name) def coerceComment(self, data): diff --git a/html5lib/inputstream.py b/html5lib/inputstream.py index 4231ae19..a9aa2a15 100644 --- a/html5lib/inputstream.py +++ b/html5lib/inputstream.py @@ -571,6 +571,7 @@ def __new__(self, value): return bytes.__new__(self, value.lower()) def __init__(self, value): + # pylint:disable=unused-argument self._position = -1 def __iter__(self): diff --git a/html5lib/serializer/htmlserializer.py b/html5lib/serializer/htmlserializer.py index d66ff36c..641d8c1c 100644 --- a/html5lib/serializer/htmlserializer.py +++ b/html5lib/serializer/htmlserializer.py @@ -328,6 +328,6 @@ def serializeError(self, data="XXX ERROR MESSAGE NEEDED"): raise SerializeError -def SerializeError(Exception): +class SerializeError(Exception): """Error in serialized tree""" pass diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py index 9f8ae22c..e19deea8 100644 --- a/html5lib/tests/test_sanitizer.py +++ b/html5lib/tests/test_sanitizer.py @@ -4,7 +4,7 @@ from html5lib.filters import sanitizer -def runSanitizerTest(name, expected, input): +def runSanitizerTest(_, expected, input): parsed = parseFragment(expected) expected = serialize(parsed, omit_optional_tags=False, diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py index a92ee0a3..835e32e5 100644 --- a/html5lib/tests/test_stream.py +++ b/html5lib/tests/test_stream.py @@ -184,6 +184,7 @@ def test_python_issue_20007(): """ class FakeSocket(object): def makefile(self, _mode, _bufsize=None): + # pylint:disable=unused-argument return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") source = http_client.HTTPResponse(FakeSocket()) @@ -202,6 +203,7 @@ def test_python_issue_20007_b(): class FakeSocket(object): def makefile(self, _mode, _bufsize=None): + # pylint:disable=unused-argument return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") source = http_client.HTTPResponse(FakeSocket()) diff --git a/html5lib/tests/tokenizer.py b/html5lib/tests/tokenizer.py index c6163a1f..255c1859 100644 --- a/html5lib/tests/tokenizer.py +++ b/html5lib/tests/tokenizer.py @@ -19,6 +19,7 @@ def __init__(self, initialState, lastStartTag=None): self._lastStartTag = lastStartTag def parse(self, stream, encoding=None, innerHTML=False): + # pylint:disable=unused-argument tokenizer = self.tokenizer(stream, encoding) self.outputTokens = [] diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py index 79a4d4c5..a92b3aa9 100644 --- a/html5lib/treebuilders/etree_lxml.py +++ b/html5lib/treebuilders/etree_lxml.py @@ -305,6 +305,8 @@ def insertDoctype(self, token): self.doctype = doctype def insertCommentInitial(self, data, parent=None): + assert parent is None or parent is self.document + assert self.document._elementTree is None self.initial_comments.append(data) def insertCommentMain(self, data, parent=None): From e5d395c28c7357ace9352fd162f8efe1d8ac8143 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Fri, 20 May 2016 20:25:29 +0100 Subject: [PATCH 12/27] Silence wrong-import-position --- html5lib/serializer/htmlserializer.py | 103 ++++++++++++-------------- html5lib/tests/support.py | 4 + html5lib/tests/test_encoding.py | 2 + html5lib/tests/test_serializer.py | 2 + html5lib/tokenizer.py | 5 +- html5lib/trie/__init__.py | 2 + 6 files changed, 60 insertions(+), 58 deletions(-) diff --git a/html5lib/serializer/htmlserializer.py b/html5lib/serializer/htmlserializer.py index 641d8c1c..be2718d3 100644 --- a/html5lib/serializer/htmlserializer.py +++ b/html5lib/serializer/htmlserializer.py @@ -3,6 +3,8 @@ import re +from codecs import register_error, xmlcharrefreplace_errors + from ..constants import voidElements, booleanAttributes, spaceCharacters from ..constants import rcdataElements, entities, xmlEntities from .. import utils @@ -21,61 +23,54 @@ "\u2008\u2009\u200a\u2028\u2029\u202f\u205f" "\u3000]") -try: - from codecs import register_error, xmlcharrefreplace_errors -except ImportError: - unicode_encode_errors = "strict" -else: - unicode_encode_errors = "htmlentityreplace" - - encode_entity_map = {} - is_ucs4 = len("\U0010FFFF") == 1 - for k, v in list(entities.items()): - # skip multi-character entities - if ((is_ucs4 and len(v) > 1) or - (not is_ucs4 and len(v) > 2)): - continue - if v != "&": - if len(v) == 2: - v = utils.surrogatePairToCodepoint(v) - else: - v = ord(v) - if v not in encode_entity_map or k.islower(): - # prefer < over < and similarly for &, >, etc. - encode_entity_map[v] = k - - def htmlentityreplace_errors(exc): - if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)): - res = [] - codepoints = [] - skip = False - for i, c in enumerate(exc.object[exc.start:exc.end]): - if skip: - skip = False - continue - index = i + exc.start - if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]): - codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2]) - skip = True - else: - codepoint = ord(c) - codepoints.append(codepoint) - for cp in codepoints: - e = encode_entity_map.get(cp) - if e: - res.append("&") - res.append(e) - if not e.endswith(";"): - res.append(";") - else: - res.append("&#x%s;" % (hex(cp)[2:])) - return ("".join(res), exc.end) - else: - return xmlcharrefreplace_errors(exc) - register_error(unicode_encode_errors, htmlentityreplace_errors) +encode_entity_map = {} +is_ucs4 = len("\U0010FFFF") == 1 +for k, v in list(entities.items()): + # skip multi-character entities + if ((is_ucs4 and len(v) > 1) or + (not is_ucs4 and len(v) > 2)): + continue + if v != "&": + if len(v) == 2: + v = utils.surrogatePairToCodepoint(v) + else: + v = ord(v) + if v not in encode_entity_map or k.islower(): + # prefer < over < and similarly for &, >, etc. + encode_entity_map[v] = k + + +def htmlentityreplace_errors(exc): + if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)): + res = [] + codepoints = [] + skip = False + for i, c in enumerate(exc.object[exc.start:exc.end]): + if skip: + skip = False + continue + index = i + exc.start + if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]): + codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2]) + skip = True + else: + codepoint = ord(c) + codepoints.append(codepoint) + for cp in codepoints: + e = encode_entity_map.get(cp) + if e: + res.append("&") + res.append(e) + if not e.endswith(";"): + res.append(";") + else: + res.append("&#x%s;" % (hex(cp)[2:])) + return ("".join(res), exc.end) + else: + return xmlcharrefreplace_errors(exc) - del register_error +register_error("htmlentityreplace", htmlentityreplace_errors) class HTMLSerializer(object): @@ -168,7 +163,7 @@ def __init__(self, **kwargs): def encode(self, string): assert(isinstance(string, text_type)) if self.encoding: - return string.encode(self.encoding, unicode_encode_errors) + return string.encode(self.encoding, "htmlentityreplace") else: return string diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py index 5f3cc619..6ae09dbe 100644 --- a/html5lib/tests/support.py +++ b/html5lib/tests/support.py @@ -1,5 +1,7 @@ from __future__ import absolute_import, division, unicode_literals +# pylint:disable=wrong-import-position + import os import sys import codecs @@ -68,6 +70,8 @@ "walker": treewalkers.getTreeWalker("genshi") } +# pylint:enable=wrong-import-position + def get_data_files(subdirectory, files='*.dat', search_dir=test_dir): return sorted(glob.glob(os.path.join(search_dir, subdirectory, files))) diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py index c380957c..c5d2af12 100644 --- a/html5lib/tests/test_encoding.py +++ b/html5lib/tests/test_encoding.py @@ -55,6 +55,7 @@ def test_encoding(): yield (runParserEncodingTest, test[b'data'], test[b'encoding']) yield (runPreScanEncodingTest, test[b'data'], test[b'encoding']) +# pylint:disable=wrong-import-position try: try: import charade # noqa @@ -67,3 +68,4 @@ def test_chardet(): with open(os.path.join(test_dir, "encoding", "chardet", "test_big5.txt"), "rb") as fp: encoding = inputstream.HTMLInputStream(fp.read()).charEncoding assert encoding[0].name == "big5" +# pylint:enable=wrong-import-position diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py index d2e3a48a..b3cda7d7 100644 --- a/html5lib/tests/test_serializer.py +++ b/html5lib/tests/test_serializer.py @@ -12,6 +12,7 @@ from html5lib.serializer import HTMLSerializer, serialize from html5lib.treewalkers._base import TreeWalker +# pylint:disable=wrong-import-position optionals_loaded = [] try: @@ -19,6 +20,7 @@ optionals_loaded.append("lxml") except ImportError: pass +# pylint:enable=wrong-import-position default_namespace = constants.namespaces["html"] diff --git a/html5lib/tokenizer.py b/html5lib/tokenizer.py index 50e505a9..dd6ea75f 100644 --- a/html5lib/tokenizer.py +++ b/html5lib/tokenizer.py @@ -1,9 +1,6 @@ from __future__ import absolute_import, division, unicode_literals -try: - chr = unichr # noqa -except NameError: - pass +from six import unichr as chr from collections import deque diff --git a/html5lib/trie/__init__.py b/html5lib/trie/__init__.py index a8cca8a9..a5ba4bf1 100644 --- a/html5lib/trie/__init__.py +++ b/html5lib/trie/__init__.py @@ -4,9 +4,11 @@ Trie = PyTrie +# pylint:disable=wrong-import-position try: from .datrie import Trie as DATrie except ImportError: pass else: Trie = DATrie +# pylint:enable=wrong-import-position From b64df28cfb9e721ec3450e514ef8866001314eec Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Fri, 20 May 2016 20:29:15 +0100 Subject: [PATCH 13/27] Change which way around we overwrite this for clarity's sake --- html5lib/html5parser.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 66ad7430..331b8fd7 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -953,8 +953,8 @@ class InBodyPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - # Keep a ref to this for special handling of whitespace in
-            self.processSpaceCharactersNonPre = self.processSpaceCharacters
+            # Set this to the default handler
+            self.processSpaceCharacters = self.processSpaceCharactersNonPre
 
             self.startTagHandler = utils.MethodDispatcher([
                 ("html", self.startTagHtml),
@@ -1087,7 +1087,7 @@ def processCharacters(self, token):
                      for char in token["data"]])):
                 self.parser.framesetOK = False
 
-        def processSpaceCharacters(self, token):
+        def processSpaceCharactersNonPre(self, token):
             self.tree.reconstructActiveFormattingElements()
             self.tree.insertText(token["data"])
 

From df0b2ba4ddb78384e0b35be9f31a3848f21a2464 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon 
Date: Fri, 20 May 2016 20:33:43 +0100
Subject: [PATCH 14/27] Remove unused import

---
 html5lib/inputstream.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/html5lib/inputstream.py b/html5lib/inputstream.py
index a9aa2a15..b43c2bda 100644
--- a/html5lib/inputstream.py
+++ b/html5lib/inputstream.py
@@ -19,12 +19,6 @@
 except ImportError:
     BytesIO = StringIO
 
-try:
-    from io import BufferedIOBase
-except ImportError:
-    class BufferedIOBase(object):
-        pass
-
 # Non-unicode versions of constants for use in the pre-parser
 spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
 asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])

From 742715d5948456adc6bff21fce88e6b0858364d6 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon 
Date: Fri, 20 May 2016 22:18:52 +0100
Subject: [PATCH 15/27] Fix invalid_unicode_re on platforms supporting lone
 surrogates

---
 html5lib/inputstream.py       |   6 +-
 html5lib/tests/test_stream.py | 112 +++++++++++++++++++++++++++++++++-
 2 files changed, 115 insertions(+), 3 deletions(-)

diff --git a/html5lib/inputstream.py b/html5lib/inputstream.py
index b43c2bda..e63e1215 100644
--- a/html5lib/inputstream.py
+++ b/html5lib/inputstream.py
@@ -33,8 +33,10 @@
     # unichr. Not using this indirection would introduce an illegal
     # unicode literal on platforms not supporting such lone
     # surrogates.
-    invalid_unicode_re = re.compile(invalid_unicode_no_surrogate +
-                                    eval('"\\uD800-\\uDFFF"'))
+    assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1
+    invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] +
+                                    eval('"\\uD800-\\uDFFF"') +
+                                    "]")
 else:
     invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
 
diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py
index 835e32e5..4e8453df 100644
--- a/html5lib/tests/test_stream.py
+++ b/html5lib/tests/test_stream.py
@@ -3,13 +3,17 @@
 from . import support  # noqa
 
 import codecs
-from io import BytesIO
+import sys
+from io import BytesIO, StringIO
+
+import pytest
 
 import six
 from six.moves import http_client, urllib
 
 from html5lib.inputstream import (BufferedStream, HTMLInputStream,
                                   HTMLUnicodeInputStream, HTMLBinaryInputStream)
+from html5lib.utils import supports_lone_surrogates
 
 
 def test_basic():
@@ -211,3 +215,109 @@ def makefile(self, _mode, _bufsize=None):
     wrapped = urllib.response.addinfourl(source, source.msg, "http://example.com")
     stream = HTMLInputStream(wrapped)
     assert stream.charsUntil(" ") == "Text"
+
+
+@pytest.mark.parametrize("inp,num",
+                         [("\u0000", 0),
+                          ("\u0001", 1),
+                          ("\u0008", 1),
+                          ("\u0009", 0),
+                          ("\u000A", 0),
+                          ("\u000B", 1),
+                          ("\u000C", 0),
+                          ("\u000D", 0),
+                          ("\u000E", 1),
+                          ("\u001F", 1),
+                          ("\u0020", 0),
+                          ("\u007E", 0),
+                          ("\u007F", 1),
+                          ("\u009F", 1),
+                          ("\u00A0", 0),
+                          ("\uFDCF", 0),
+                          ("\uFDD0", 1),
+                          ("\uFDEF", 1),
+                          ("\uFDF0", 0),
+                          ("\uFFFD", 0),
+                          ("\uFFFE", 1),
+                          ("\uFFFF", 1),
+                          ("\U0001FFFD", 0),
+                          ("\U0001FFFE", 1),
+                          ("\U0001FFFF", 1),
+                          ("\U0002FFFD", 0),
+                          ("\U0002FFFE", 1),
+                          ("\U0002FFFF", 1),
+                          ("\U0003FFFD", 0),
+                          ("\U0003FFFE", 1),
+                          ("\U0003FFFF", 1),
+                          ("\U0004FFFD", 0),
+                          ("\U0004FFFE", 1),
+                          ("\U0004FFFF", 1),
+                          ("\U0005FFFD", 0),
+                          ("\U0005FFFE", 1),
+                          ("\U0005FFFF", 1),
+                          ("\U0006FFFD", 0),
+                          ("\U0006FFFE", 1),
+                          ("\U0006FFFF", 1),
+                          ("\U0007FFFD", 0),
+                          ("\U0007FFFE", 1),
+                          ("\U0007FFFF", 1),
+                          ("\U0008FFFD", 0),
+                          ("\U0008FFFE", 1),
+                          ("\U0008FFFF", 1),
+                          ("\U0009FFFD", 0),
+                          ("\U0009FFFE", 1),
+                          ("\U0009FFFF", 1),
+                          ("\U000AFFFD", 0),
+                          ("\U000AFFFE", 1),
+                          ("\U000AFFFF", 1),
+                          ("\U000BFFFD", 0),
+                          ("\U000BFFFE", 1),
+                          ("\U000BFFFF", 1),
+                          ("\U000CFFFD", 0),
+                          ("\U000CFFFE", 1),
+                          ("\U000CFFFF", 1),
+                          ("\U000DFFFD", 0),
+                          ("\U000DFFFE", 1),
+                          ("\U000DFFFF", 1),
+                          ("\U000EFFFD", 0),
+                          ("\U000EFFFE", 1),
+                          ("\U000EFFFF", 1),
+                          ("\U000FFFFD", 0),
+                          ("\U000FFFFE", 1),
+                          ("\U000FFFFF", 1),
+                          ("\U0010FFFD", 0),
+                          ("\U0010FFFE", 1),
+                          ("\U0010FFFF", 1),
+                          ("\x01\x01\x01", 3),
+                          ("a\x01a\x01a\x01a", 3)])
+def test_invalid_codepoints(inp, num):
+    stream = HTMLUnicodeInputStream(StringIO(inp))
+    for _i in range(len(inp)):
+        stream.char()
+    assert len(stream.errors) == num
+
+
+@pytest.mark.skipif(not supports_lone_surrogates, reason="doesn't support lone surrogates")
+@pytest.mark.parametrize("inp,num",
+                         [("'\\uD7FF'", 0),
+                          ("'\\uD800'", 1),
+                          ("'\\uDBFF'", 1),
+                          ("'\\uDC00'", 1),
+                          ("'\\uDFFF'", 1),
+                          ("'\\uE000'", 0),
+                          ("'\\uD800\\uD800\\uD800'", 3),
+                          ("'a\\uD800a\\uD800a\\uD800a'", 3),
+                          ("'\\uDFFF\\uDBFF'", 2),
+                          pytest.mark.skipif(sys.maxunicode == 0xFFFF,
+                                             ("'\\uDBFF\\uDFFF'", 2),
+                                             reason="narrow Python")])
+def test_invalid_codepoints_surrogates(inp, num):
+    inp = eval(inp)
+    fp = StringIO(inp)
+    if ord(max(fp.read())) > 0xFFFF:
+        pytest.skip("StringIO altered string")
+    fp.seek(0)
+    stream = HTMLUnicodeInputStream(fp)
+    for _i in range(len(inp)):
+        stream.char()
+    assert len(stream.errors) == num

From cd74ec7a49943ab858fc120c19642e6181b58667 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon 
Date: Fri, 20 May 2016 22:19:31 +0100
Subject: [PATCH 16/27] Fix comment

---
 html5lib/inputstream.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/html5lib/inputstream.py b/html5lib/inputstream.py
index e63e1215..bb240015 100644
--- a/html5lib/inputstream.py
+++ b/html5lib/inputstream.py
@@ -30,7 +30,7 @@
 
 if utils.supports_lone_surrogates:
     # Use one extra step of indirection and create surrogates with
-    # unichr. Not using this indirection would introduce an illegal
+    # eval. Not using this indirection would introduce an illegal
     # unicode literal on platforms not supporting such lone
     # surrogates.
     assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1

From 15e126fcba9948779f662d5382e5665f6355e629 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon 
Date: Fri, 20 May 2016 22:27:49 +0100
Subject: [PATCH 17/27] Silence eval-used

---
 html5lib/inputstream.py       | 2 +-
 html5lib/tests/test_stream.py | 2 +-
 html5lib/utils.py             | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/html5lib/inputstream.py b/html5lib/inputstream.py
index bb240015..1ed277ca 100644
--- a/html5lib/inputstream.py
+++ b/html5lib/inputstream.py
@@ -35,7 +35,7 @@
     # surrogates.
     assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1
     invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] +
-                                    eval('"\\uD800-\\uDFFF"') +
+                                    eval('"\\uD800-\\uDFFF"') +  # pylint:disable=eval-used
                                     "]")
 else:
     invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py
index 4e8453df..77e411d5 100644
--- a/html5lib/tests/test_stream.py
+++ b/html5lib/tests/test_stream.py
@@ -312,7 +312,7 @@ def test_invalid_codepoints(inp, num):
                                              ("'\\uDBFF\\uDFFF'", 2),
                                              reason="narrow Python")])
 def test_invalid_codepoints_surrogates(inp, num):
-    inp = eval(inp)
+    inp = eval(inp)  # pylint:disable=eval-used
     fp = StringIO(inp)
     if ord(max(fp.read())) > 0xFFFF:
         pytest.skip("StringIO altered string")
diff --git a/html5lib/utils.py b/html5lib/utils.py
index c83a089f..f27ca73a 100644
--- a/html5lib/utils.py
+++ b/html5lib/utils.py
@@ -22,10 +22,10 @@
 # surrogates, and there is no mechanism to further escape such
 # escapes.
 try:
-    _x = eval('"\\uD800"')
+    _x = eval('"\\uD800"')  # pylint:disable=eval-used
     if not isinstance(_x, text_type):
         # We need this with u"" because of http://bugs.jython.org/issue2039
-        _x = eval('u"\\uD800"')
+        _x = eval('u"\\uD800"')  # pylint:disable=eval-used
         assert isinstance(_x, text_type)
 except:
     supports_lone_surrogates = False

From bfc278ae93cbe56e619d3fc3e0a82f9346584104 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon 
Date: Fri, 20 May 2016 22:45:00 +0100
Subject: [PATCH 18/27] Silence bare-except

---
 html5lib/inputstream.py | 2 +-
 html5lib/utils.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/html5lib/inputstream.py b/html5lib/inputstream.py
index 1ed277ca..58d626c9 100644
--- a/html5lib/inputstream.py
+++ b/html5lib/inputstream.py
@@ -449,7 +449,7 @@ def openStream(self, source):
 
         try:
             stream.seek(stream.tell())
-        except:
+        except:  # pylint:disable=bare-except
             stream = BufferedStream(stream)
 
         return stream
diff --git a/html5lib/utils.py b/html5lib/utils.py
index f27ca73a..5fe237a0 100644
--- a/html5lib/utils.py
+++ b/html5lib/utils.py
@@ -27,7 +27,7 @@
         # We need this with u"" because of http://bugs.jython.org/issue2039
         _x = eval('u"\\uD800"')  # pylint:disable=eval-used
         assert isinstance(_x, text_type)
-except:
+except:  # pylint:disable=bare-except
     supports_lone_surrogates = False
 else:
     supports_lone_surrogates = True

From b46fcdf6faf27bdfc99c47b3c2b9129606c02728 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon 
Date: Fri, 20 May 2016 22:48:21 +0100
Subject: [PATCH 19/27] Silence too-many-nested-blocks

---
 html5lib/serializer/htmlserializer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/html5lib/serializer/htmlserializer.py b/html5lib/serializer/htmlserializer.py
index be2718d3..8a9439df 100644
--- a/html5lib/serializer/htmlserializer.py
+++ b/html5lib/serializer/htmlserializer.py
@@ -175,6 +175,7 @@ def encodeStrict(self, string):
             return string
 
     def serialize(self, treewalker, encoding=None):
+        # pylint:disable=too-many-nested-blocks
         self.encoding = encoding
         in_cdata = False
         self.errors = []

From 6945bc480d1813f4cfccf135d7f38aadaaad8161 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon 
Date: Fri, 20 May 2016 22:48:31 +0100
Subject: [PATCH 20/27] Silence not-callable

---
 html5lib/treebuilders/_base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/html5lib/treebuilders/_base.py b/html5lib/treebuilders/_base.py
index 8196f591..900a724c 100644
--- a/html5lib/treebuilders/_base.py
+++ b/html5lib/treebuilders/_base.py
@@ -126,6 +126,7 @@ class TreeBuilder(object):
     commentClass - the class to use for comments
     doctypeClass - the class to use for doctypes
     """
+    # pylint:disable=not-callable
 
     # Document class
     documentClass = None

From 0c290e06f8eb34786b1c0b6acd14ed1f555ae27f Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon 
Date: Fri, 20 May 2016 22:50:45 +0100
Subject: [PATCH 21/27] Kill long-dead finalText code

---
 html5lib/treebuilders/etree_lxml.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index a92b3aa9..71285b68 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -53,7 +53,6 @@ def _getChildNodes(self):
 
 def testSerializer(element):
     rv = []
-    finalText = None
     infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
 
     def serializeElement(element, indent=0):
@@ -128,16 +127,12 @@ def serializeElement(element, indent=0):
                 rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
     serializeElement(element, 0)
 
-    if finalText is not None:
-        rv.append("|%s\"%s\"" % (' ' * 2, finalText))
-
     return "\n".join(rv)
 
 
 def tostring(element):
     """Serialize an element and its child nodes to a string"""
     rv = []
-    finalText = None
 
     def serializeElement(element):
         if not hasattr(element, "tag"):
@@ -173,9 +168,6 @@ def serializeElement(element):
 
     serializeElement(element)
 
-    if finalText is not None:
-        rv.append("%s\"" % (' ' * 2, finalText))
-
     return "".join(rv)
 
 

From da099dce1bb72428336e643f54ff1a8934f9804d Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon 
Date: Fri, 20 May 2016 22:59:19 +0100
Subject: [PATCH 22/27] Silence a buggily output non-parent-init-called

---
 html5lib/treebuilders/etree_lxml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index 71285b68..09d85039 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -189,7 +189,7 @@ def __init__(self, element, value=None):
                 if value is None:
                     value = {}
                 self._element = element
-                dict.__init__(self, value)
+                dict.__init__(self, value)  # pylint:disable=non-parent-init-called
                 for key, value in self.items():
                     if isinstance(key, tuple):
                         name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))

From 97427de90dd2a9ebf12cc1e36858eea931deab60 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon 
Date: Fri, 20 May 2016 23:00:36 +0100
Subject: [PATCH 23/27] Fix indentation

---
 html5lib/treebuilders/etree_lxml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index 09d85039..abcd4b1d 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -304,7 +304,7 @@ def insertCommentInitial(self, data, parent=None):
     def insertCommentMain(self, data, parent=None):
         if (parent == self.document and
                 self.document._elementTree.getroot()[-1].tag == comment_type):
-                warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
+            warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
         super(TreeBuilder, self).insertComment(data, parent)
 
     def insertRoot(self, token):

From 2afe09bcbcc728e98ec8da39b68ea65f4c270fdb Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon 
Date: Fri, 20 May 2016 23:05:48 +0100
Subject: [PATCH 24/27] Make this in practice unreachable code work on Py2

---
 html5lib/trie/_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/html5lib/trie/_base.py b/html5lib/trie/_base.py
index 724486b1..be6cb6e3 100644
--- a/html5lib/trie/_base.py
+++ b/html5lib/trie/_base.py
@@ -7,7 +7,7 @@ class Trie(Mapping):
     """Abstract base class for tries"""
 
     def keys(self, prefix=None):
-        keys = super().keys()
+        keys = super(Trie, self).keys()
 
         if prefix is None:
             return set(keys)

From c0df867ebdeda6adc6dca9ff796eccf64e3ebda0 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon 
Date: Fri, 20 May 2016 23:07:38 +0100
Subject: [PATCH 25/27] Silence arguments-differ

---
 html5lib/trie/_base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/html5lib/trie/_base.py b/html5lib/trie/_base.py
index be6cb6e3..25eece46 100644
--- a/html5lib/trie/_base.py
+++ b/html5lib/trie/_base.py
@@ -7,6 +7,7 @@ class Trie(Mapping):
     """Abstract base class for tries"""
 
     def keys(self, prefix=None):
+        # pylint:disable=arguments-differ
         keys = super(Trie, self).keys()
 
         if prefix is None:

From 5dce4f27289090ed4662aee8881782a2efbcd20c Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon 
Date: Fri, 20 May 2016 23:19:55 +0100
Subject: [PATCH 26/27] Silence protected-access

---
 html5lib/treebuilders/dom.py        | 1 +
 html5lib/treebuilders/etree.py      | 2 ++
 html5lib/treebuilders/etree_lxml.py | 1 +
 3 files changed, 4 insertions(+)

diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py
index 27432c7a..b7df74b2 100644
--- a/html5lib/treebuilders/dom.py
+++ b/html5lib/treebuilders/dom.py
@@ -158,6 +158,7 @@ def insertText(self, data, parent=None):
             else:
                 # HACK: allow text nodes as children of the document node
                 if hasattr(self.dom, '_child_node_types'):
+                    # pylint:disable=protected-access
                     if Node.TEXT_NODE not in self.dom._child_node_types:
                         self.dom._child_node_types = list(self.dom._child_node_types)
                         self.dom._child_node_types.append(Node.TEXT_NODE)
diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py
index b607948b..d394148d 100644
--- a/html5lib/treebuilders/etree.py
+++ b/html5lib/treebuilders/etree.py
@@ -1,4 +1,6 @@
 from __future__ import absolute_import, division, unicode_literals
+# pylint:disable=protected-access
+
 from six import text_type
 
 import re
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index abcd4b1d..2a69769b 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -10,6 +10,7 @@
 """
 
 from __future__ import absolute_import, division, unicode_literals
+# pylint:disable=protected-access
 
 import warnings
 import re

From a2b8c110cd0c5c7d60573f2a86d951cabefc516b Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon 
Date: Fri, 4 Dec 2015 02:13:53 +0000
Subject: [PATCH 27/27] Add prospector/pylint config for the sake of Landscape.

---
 .prospector.yaml | 21 +++++++++++++++++++++
 .pylintrc        | 10 ++++++++++
 2 files changed, 31 insertions(+)
 create mode 100644 .prospector.yaml
 create mode 100644 .pylintrc

diff --git a/.prospector.yaml b/.prospector.yaml
new file mode 100644
index 00000000..7e8efe1a
--- /dev/null
+++ b/.prospector.yaml
@@ -0,0 +1,21 @@
+strictness: veryhigh
+doc-warnings: false
+test-warnings: false
+
+max-line-length: 139
+
+requirements:
+  - requirements.txt
+  - requirements-test.txt
+  - requirements-optional.txt
+
+ignore-paths:
+  - parse.py
+  - utils/
+
+python-targets:
+  - 2
+  - 3
+
+mccabe:
+  run: false
diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 00000000..ea74d5db
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,10 @@
+[MASTER]
+ignore=tests
+
+[MESSAGES CONTROL]
+# messages up to fixme should probably be fixed somehow
+disable = redefined-builtin,attribute-defined-outside-init,anomalous-backslash-in-string,no-self-use,redefined-outer-name,bad-continuation,wrong-import-order,superfluous-parens,no-member,duplicate-code,super-init-not-called,abstract-method,property-on-old-class,wrong-import-position,no-name-in-module,no-init,bad-mcs-classmethod-argument,bad-classmethod-argument,fixme,invalid-name,import-error,too-few-public-methods,too-many-ancestors,too-many-arguments,too-many-boolean-expressions,too-many-branches,too-many-instance-attributes,too-many-locals,too-many-lines,too-many-public-methods,too-many-return-statements,too-many-statements,missing-docstring,line-too-long,locally-disabled,locally-enabled,bad-builtin,deprecated-lambda
+
+[FORMAT]
+max-line-length=139
+single-line-if-stmt=no