diff --git a/.travis.yml b/.travis.yml
index a48d27f5..66d92deb 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -10,6 +10,18 @@ env:
   - USE_OPTIONAL=true
   - USE_OPTIONAL=false
 
+matrix:
+  exclude:
+    - python: "2.7"
+      env: USE_OPTIONAL=false
+    - python: "3.3"
+      env: USE_OPTIONAL=false
+  include:
+    - python: "2.7"
+      env: USE_OPTIONAL=false FLAKE=true
+    - python: "3.3"
+      env: USE_OPTIONAL=false FLAKE=true
+
 before_install:
   - git submodule update --init --recursive
 
@@ -19,9 +31,12 @@ install:
   - if [[ $TRAVIS_PYTHON_VERSION != 3.* && $USE_OPTIONAL == "true" ]]; then pip install -r requirements-optional-2.txt --use-mirrors; fi
   - if [[ $TRAVIS_PYTHON_VERSION == 3.* && $USE_OPTIONAL == "true" ]]; then pip install -r requirements-optional-3.txt --use-mirrors; fi
   - if [[ $TRAVIS_PYTHON_VERSION != "pypy" && $USE_OPTIONAL == "true" ]]; then pip install -r requirements-optional-cpython.txt --use-mirrors; fi
+  - if [[ $FLAKE == "true" ]]; then pip install --use-mirrors flake8; fi
 
 script:
   - nosetests
+  - if [[ $FLAKE == "true" ]]; then find html5lib/ -name '*.py' -and -not -name 'constants.py' -print0 | xargs -0 flake8 --ignore=E501; fi
+  - if [[ $FLAKE == "true" ]]; then flake8 --max-line-length=99 --ignore=E126 html5lib/constants.py; fi
 
 after_script:
   - python debug-info.py
diff --git a/html5lib/__init__.py b/html5lib/__init__.py
index 528da9fa..10e2b74c 100644
--- a/html5lib/__init__.py
+++ b/html5lib/__init__.py
@@ -18,4 +18,6 @@
 from .treewalkers import getTreeWalker
 from .serializer import serialize
 
+__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
+           "getTreeWalker", "serialize"]
 __version__ = "1.0b1"
diff --git a/html5lib/constants.py b/html5lib/constants.py
index 952fef41..1866dd78 100644
--- a/html5lib/constants.py
+++ b/html5lib/constants.py
@@ -1,300 +1,301 @@
 from __future__ import absolute_import, division, unicode_literals
 
-import string, gettext
+import string
+import gettext
 _ = gettext.gettext
 
 EOF = None
 
 E = {
     "null-character":
-       _("Null character in input stream, replaced with U+FFFD."),
+        _("Null character in input stream, replaced with U+FFFD."),
     "invalid-codepoint":
-       _("Invalid codepoint in stream."),
+        _("Invalid codepoint in stream."),
     "incorrectly-placed-solidus":
-       _("Solidus (/) incorrectly placed in tag."),
+        _("Solidus (/) incorrectly placed in tag."),
     "incorrect-cr-newline-entity":
-       _("Incorrect CR newline entity, replaced with LF."),
+        _("Incorrect CR newline entity, replaced with LF."),
     "illegal-windows-1252-entity":
-       _("Entity used with illegal number (windows-1252 reference)."),
+        _("Entity used with illegal number (windows-1252 reference)."),
     "cant-convert-numeric-entity":
-       _("Numeric entity couldn't be converted to character "
-         "(codepoint U+%(charAsInt)08x)."),
+        _("Numeric entity couldn't be converted to character "
+          "(codepoint U+%(charAsInt)08x)."),
     "illegal-codepoint-for-numeric-entity":
-       _("Numeric entity represents an illegal codepoint: "
-         "U+%(charAsInt)08x."),
+        _("Numeric entity represents an illegal codepoint: "
+          "U+%(charAsInt)08x."),
     "numeric-entity-without-semicolon":
-       _("Numeric entity didn't end with ';'."),
+        _("Numeric entity didn't end with ';'."),
     "expected-numeric-entity-but-got-eof":
-       _("Numeric entity expected. Got end of file instead."),
+        _("Numeric entity expected. Got end of file instead."),
     "expected-numeric-entity":
-       _("Numeric entity expected but none found."),
+        _("Numeric entity expected but none found."),
     "named-entity-without-semicolon":
-       _("Named entity didn't end with ';'."),
+        _("Named entity didn't end with ';'."),
     "expected-named-entity":
-       _("Named entity expected. Got none."),
+        _("Named entity expected. Got none."),
     "attributes-in-end-tag":
-       _("End tag contains unexpected attributes."),
+        _("End tag contains unexpected attributes."),
     'self-closing-flag-on-end-tag':
         _("End tag contains unexpected self-closing flag."),
     "expected-tag-name-but-got-right-bracket":
-       _("Expected tag name. Got '>' instead."),
+        _("Expected tag name. Got '>' instead."),
     "expected-tag-name-but-got-question-mark":
-       _("Expected tag name. Got '?' instead. (HTML doesn't "
-         "support processing instructions.)"),
+        _("Expected tag name. Got '?' instead. (HTML doesn't "
+          "support processing instructions.)"),
     "expected-tag-name":
-       _("Expected tag name. Got something else instead"),
+        _("Expected tag name. Got something else instead"),
     "expected-closing-tag-but-got-right-bracket":
-       _("Expected closing tag. Got '>' instead. Ignoring '</>'."),
+        _("Expected closing tag. Got '>' instead. Ignoring '</>'."),
     "expected-closing-tag-but-got-eof":
-       _("Expected closing tag. Unexpected end of file."),
+        _("Expected closing tag. Unexpected end of file."),
     "expected-closing-tag-but-got-char":
-       _("Expected closing tag. Unexpected character '%(data)s' found."),
+        _("Expected closing tag. Unexpected character '%(data)s' found."),
     "eof-in-tag-name":
-       _("Unexpected end of file in the tag name."),
+        _("Unexpected end of file in the tag name."),
     "expected-attribute-name-but-got-eof":
-       _("Unexpected end of file. Expected attribute name instead."),
+        _("Unexpected end of file. Expected attribute name instead."),
     "eof-in-attribute-name":
-       _("Unexpected end of file in attribute name."),
+        _("Unexpected end of file in attribute name."),
     "invalid-character-in-attribute-name":
         _("Invalid character in attribute name"),
     "duplicate-attribute":
-       _("Dropped duplicate attribute on tag."),
+        _("Dropped duplicate attribute on tag."),
     "expected-end-of-tag-name-but-got-eof":
-       _("Unexpected end of file. Expected = or end of tag."),
+        _("Unexpected end of file. Expected = or end of tag."),
     "expected-attribute-value-but-got-eof":
-       _("Unexpected end of file. Expected attribute value."),
+        _("Unexpected end of file. Expected attribute value."),
     "expected-attribute-value-but-got-right-bracket":
-       _("Expected attribute value. Got '>' instead."),
+        _("Expected attribute value. Got '>' instead."),
     'equals-in-unquoted-attribute-value':
         _("Unexpected = in unquoted attribute"),
     'unexpected-character-in-unquoted-attribute-value':
         _("Unexpected character in unquoted attribute"),
     "invalid-character-after-attribute-name":
-       _("Unexpected character after attribute name."),
+        _("Unexpected character after attribute name."),
     "unexpected-character-after-attribute-value":
-       _("Unexpected character after attribute value."),
+        _("Unexpected character after attribute value."),
     "eof-in-attribute-value-double-quote":
-       _("Unexpected end of file in attribute value (\")."),
+        _("Unexpected end of file in attribute value (\")."),
     "eof-in-attribute-value-single-quote":
-       _("Unexpected end of file in attribute value (')."),
+        _("Unexpected end of file in attribute value (')."),
     "eof-in-attribute-value-no-quotes":
-       _("Unexpected end of file in attribute value."),
+        _("Unexpected end of file in attribute value."),
     "unexpected-EOF-after-solidus-in-tag":
         _("Unexpected end of file in tag. Expected >"),
     "unexpected-character-after-solidus-in-tag":
         _("Unexpected character after / in tag. Expected >"),
     "expected-dashes-or-doctype":
-       _("Expected '--' or 'DOCTYPE'. Not found."),
+        _("Expected '--' or 'DOCTYPE'. Not found."),
     "unexpected-bang-after-double-dash-in-comment":
         _("Unexpected ! after -- in comment"),
     "unexpected-space-after-double-dash-in-comment":
         _("Unexpected space after -- in comment"),
     "incorrect-comment":
-       _("Incorrect comment."),
+        _("Incorrect comment."),
     "eof-in-comment":
-       _("Unexpected end of file in comment."),
+        _("Unexpected end of file in comment."),
     "eof-in-comment-end-dash":
-       _("Unexpected end of file in comment (-)"),
+        _("Unexpected end of file in comment (-)"),
     "unexpected-dash-after-double-dash-in-comment":
-       _("Unexpected '-' after '--' found in comment."),
+        _("Unexpected '-' after '--' found in comment."),
     "eof-in-comment-double-dash":
-       _("Unexpected end of file in comment (--)."),
+        _("Unexpected end of file in comment (--)."),
     "eof-in-comment-end-space-state":
-       _("Unexpected end of file in comment."),
+        _("Unexpected end of file in comment."),
     "eof-in-comment-end-bang-state":
-       _("Unexpected end of file in comment."),
+        _("Unexpected end of file in comment."),
     "unexpected-char-in-comment":
-       _("Unexpected character in comment found."),
+        _("Unexpected character in comment found."),
     "need-space-after-doctype":
-       _("No space after literal string 'DOCTYPE'."),
+        _("No space after literal string 'DOCTYPE'."),
     "expected-doctype-name-but-got-right-bracket":
-       _("Unexpected > character. Expected DOCTYPE name."),
+        _("Unexpected > character. Expected DOCTYPE name."),
     "expected-doctype-name-but-got-eof":
-       _("Unexpected end of file. Expected DOCTYPE name."),
+        _("Unexpected end of file. Expected DOCTYPE name."),
     "eof-in-doctype-name":
-       _("Unexpected end of file in DOCTYPE name."),
+        _("Unexpected end of file in DOCTYPE name."),
     "eof-in-doctype":
-       _("Unexpected end of file in DOCTYPE."),
+        _("Unexpected end of file in DOCTYPE."),
     "expected-space-or-right-bracket-in-doctype":
-       _("Expected space or '>'. Got '%(data)s'"),
+        _("Expected space or '>'. Got '%(data)s'"),
     "unexpected-end-of-doctype":
-       _("Unexpected end of DOCTYPE."),
+        _("Unexpected end of DOCTYPE."),
     "unexpected-char-in-doctype":
-       _("Unexpected character in DOCTYPE."),
+        _("Unexpected character in DOCTYPE."),
     "eof-in-innerhtml":
-       _("XXX innerHTML EOF"),
+        _("XXX innerHTML EOF"),
     "unexpected-doctype":
-       _("Unexpected DOCTYPE. Ignored."),
+        _("Unexpected DOCTYPE. Ignored."),
     "non-html-root":
-       _("html needs to be the first start tag."),
+        _("html needs to be the first start tag."),
     "expected-doctype-but-got-eof":
-       _("Unexpected End of file. Expected DOCTYPE."),
+        _("Unexpected End of file. Expected DOCTYPE."),
     "unknown-doctype":
-       _("Erroneous DOCTYPE."),
+        _("Erroneous DOCTYPE."),
     "expected-doctype-but-got-chars":
-       _("Unexpected non-space characters. Expected DOCTYPE."),
+        _("Unexpected non-space characters. Expected DOCTYPE."),
     "expected-doctype-but-got-start-tag":
-       _("Unexpected start tag (%(name)s). Expected DOCTYPE."),
+        _("Unexpected start tag (%(name)s). Expected DOCTYPE."),
     "expected-doctype-but-got-end-tag":
-       _("Unexpected end tag (%(name)s). Expected DOCTYPE."),
+        _("Unexpected end tag (%(name)s). Expected DOCTYPE."),
     "end-tag-after-implied-root":
-       _("Unexpected end tag (%(name)s) after the (implied) root element."),
+        _("Unexpected end tag (%(name)s) after the (implied) root element."),
     "expected-named-closing-tag-but-got-eof":
-       _("Unexpected end of file. Expected end tag (%(name)s)."),
+        _("Unexpected end of file. Expected end tag (%(name)s)."),
     "two-heads-are-not-better-than-one":
-       _("Unexpected start tag head in existing head. Ignored."),
+        _("Unexpected start tag head in existing head. Ignored."),
     "unexpected-end-tag":
-       _("Unexpected end tag (%(name)s). Ignored."),
+        _("Unexpected end tag (%(name)s). Ignored."),
     "unexpected-start-tag-out-of-my-head":
-       _("Unexpected start tag (%(name)s) that can be in head. Moved."),
+        _("Unexpected start tag (%(name)s) that can be in head. Moved."),
     "unexpected-start-tag":
-       _("Unexpected start tag (%(name)s)."),
+        _("Unexpected start tag (%(name)s)."),
     "missing-end-tag":
-       _("Missing end tag (%(name)s)."),
+        _("Missing end tag (%(name)s)."),
     "missing-end-tags":
-       _("Missing end tags (%(name)s)."),
+        _("Missing end tags (%(name)s)."),
     "unexpected-start-tag-implies-end-tag":
-       _("Unexpected start tag (%(startName)s) "
-         "implies end tag (%(endName)s)."),
+        _("Unexpected start tag (%(startName)s) "
+          "implies end tag (%(endName)s)."),
     "unexpected-start-tag-treated-as":
-       _("Unexpected start tag (%(originalName)s). Treated as %(newName)s."),
+        _("Unexpected start tag (%(originalName)s). Treated as %(newName)s."),
     "deprecated-tag":
-       _("Unexpected start tag %(name)s. Don't use it!"),
+        _("Unexpected start tag %(name)s. Don't use it!"),
     "unexpected-start-tag-ignored":
-       _("Unexpected start tag %(name)s. Ignored."),
+        _("Unexpected start tag %(name)s. Ignored."),
     "expected-one-end-tag-but-got-another":
-       _("Unexpected end tag (%(gotName)s). "
-         "Missing end tag (%(expectedName)s)."),
+        _("Unexpected end tag (%(gotName)s). "
+          "Missing end tag (%(expectedName)s)."),
     "end-tag-too-early":
-       _("End tag (%(name)s) seen too early. Expected other end tag."),
+        _("End tag (%(name)s) seen too early. Expected other end tag."),
     "end-tag-too-early-named":
-       _("Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."),
+        _("Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."),
     "end-tag-too-early-ignored":
-       _("End tag (%(name)s) seen too early. Ignored."),
+        _("End tag (%(name)s) seen too early. Ignored."),
     "adoption-agency-1.1":
-       _("End tag (%(name)s) violates step 1, "
-         "paragraph 1 of the adoption agency algorithm."),
+        _("End tag (%(name)s) violates step 1, "
+          "paragraph 1 of the adoption agency algorithm."),
     "adoption-agency-1.2":
-       _("End tag (%(name)s) violates step 1, "
-         "paragraph 2 of the adoption agency algorithm."),
+        _("End tag (%(name)s) violates step 1, "
+          "paragraph 2 of the adoption agency algorithm."),
     "adoption-agency-1.3":
-       _("End tag (%(name)s) violates step 1, "
-         "paragraph 3 of the adoption agency algorithm."),
+        _("End tag (%(name)s) violates step 1, "
+          "paragraph 3 of the adoption agency algorithm."),
     "adoption-agency-4.4":
-       _("End tag (%(name)s) violates step 4, "
-         "paragraph 4 of the adoption agency algorithm."),
+        _("End tag (%(name)s) violates step 4, "
+          "paragraph 4 of the adoption agency algorithm."),
     "unexpected-end-tag-treated-as":
-       _("Unexpected end tag (%(originalName)s). Treated as %(newName)s."),
+        _("Unexpected end tag (%(originalName)s). Treated as %(newName)s."),
     "no-end-tag":
-       _("This element (%(name)s) has no end tag."),
+        _("This element (%(name)s) has no end tag."),
     "unexpected-implied-end-tag-in-table":
-       _("Unexpected implied end tag (%(name)s) in the table phase."),
+        _("Unexpected implied end tag (%(name)s) in the table phase."),
     "unexpected-implied-end-tag-in-table-body":
-       _("Unexpected implied end tag (%(name)s) in the table body phase."),
+        _("Unexpected implied end tag (%(name)s) in the table body phase."),
     "unexpected-char-implies-table-voodoo":
-       _("Unexpected non-space characters in "
-         "table context caused voodoo mode."),
+        _("Unexpected non-space characters in "
+          "table context caused voodoo mode."),
     "unexpected-hidden-input-in-table":
-       _("Unexpected input with type hidden in table context."),
+        _("Unexpected input with type hidden in table context."),
     "unexpected-form-in-table":
-       _("Unexpected form in table context."),
+        _("Unexpected form in table context."),
     "unexpected-start-tag-implies-table-voodoo":
-       _("Unexpected start tag (%(name)s) in "
-         "table context caused voodoo mode."),
+        _("Unexpected start tag (%(name)s) in "
+          "table context caused voodoo mode."),
     "unexpected-end-tag-implies-table-voodoo":
-       _("Unexpected end tag (%(name)s) in "
-         "table context caused voodoo mode."),
+        _("Unexpected end tag (%(name)s) in "
+          "table context caused voodoo mode."),
     "unexpected-cell-in-table-body":
-       _("Unexpected table cell start tag (%(name)s) "
-         "in the table body phase."),
+        _("Unexpected table cell start tag (%(name)s) "
+          "in the table body phase."),
     "unexpected-cell-end-tag":
-       _("Got table cell end tag (%(name)s) "
-         "while required end tags are missing."),
+        _("Got table cell end tag (%(name)s) "
+          "while required end tags are missing."),
     "unexpected-end-tag-in-table-body":
-       _("Unexpected end tag (%(name)s) in the table body phase. Ignored."),
+        _("Unexpected end tag (%(name)s) in the table body phase. Ignored."),
     "unexpected-implied-end-tag-in-table-row":
-       _("Unexpected implied end tag (%(name)s) in the table row phase."),
+        _("Unexpected implied end tag (%(name)s) in the table row phase."),
     "unexpected-end-tag-in-table-row":
-       _("Unexpected end tag (%(name)s) in the table row phase. Ignored."),
+        _("Unexpected end tag (%(name)s) in the table row phase. Ignored."),
     "unexpected-select-in-select":
-       _("Unexpected select start tag in the select phase "
-         "treated as select end tag."),
+        _("Unexpected select start tag in the select phase "
+          "treated as select end tag."),
     "unexpected-input-in-select":
-       _("Unexpected input start tag in the select phase."),
+        _("Unexpected input start tag in the select phase."),
     "unexpected-start-tag-in-select":
-       _("Unexpected start tag token (%(name)s in the select phase. "
-         "Ignored."),
+        _("Unexpected start tag token (%(name)s in the select phase. "
+          "Ignored."),
     "unexpected-end-tag-in-select":
-       _("Unexpected end tag (%(name)s) in the select phase. Ignored."),
+        _("Unexpected end tag (%(name)s) in the select phase. Ignored."),
     "unexpected-table-element-start-tag-in-select-in-table":
-       _("Unexpected table element start tag (%(name)s) in the select in table phase."),
+        _("Unexpected table element start tag (%(name)s) in the select in table phase."),
     "unexpected-table-element-end-tag-in-select-in-table":
-       _("Unexpected table element end tag (%(name)s) in the select in table phase."),
+        _("Unexpected table element end tag (%(name)s) in the select in table phase."),
     "unexpected-char-after-body":
-       _("Unexpected non-space characters in the after body phase."),
+        _("Unexpected non-space characters in the after body phase."),
     "unexpected-start-tag-after-body":
-       _("Unexpected start tag token (%(name)s)"
-         " in the after body phase."),
+        _("Unexpected start tag token (%(name)s)"
+          " in the after body phase."),
     "unexpected-end-tag-after-body":
-       _("Unexpected end tag token (%(name)s)"
-         " in the after body phase."),
+        _("Unexpected end tag token (%(name)s)"
+          " in the after body phase."),
     "unexpected-char-in-frameset":
-       _("Unexpected characters in the frameset phase. Characters ignored."),
+        _("Unexpected characters in the frameset phase. Characters ignored."),
     "unexpected-start-tag-in-frameset":
-       _("Unexpected start tag token (%(name)s)"
-         " in the frameset phase. Ignored."),
+        _("Unexpected start tag token (%(name)s)"
+          " in the frameset phase. Ignored."),
     "unexpected-frameset-in-frameset-innerhtml":
-       _("Unexpected end tag token (frameset) "
-         "in the frameset phase (innerHTML)."),
+        _("Unexpected end tag token (frameset) "
+          "in the frameset phase (innerHTML)."),
     "unexpected-end-tag-in-frameset":
-       _("Unexpected end tag token (%(name)s)"
-         " in the frameset phase. Ignored."),
+        _("Unexpected end tag token (%(name)s)"
+          " in the frameset phase. Ignored."),
     "unexpected-char-after-frameset":
-       _("Unexpected non-space characters in the "
-         "after frameset phase. Ignored."),
+        _("Unexpected non-space characters in the "
+          "after frameset phase. Ignored."),
     "unexpected-start-tag-after-frameset":
-       _("Unexpected start tag (%(name)s)"
-         " in the after frameset phase. Ignored."),
+        _("Unexpected start tag (%(name)s)"
+          " in the after frameset phase. Ignored."),
     "unexpected-end-tag-after-frameset":
-       _("Unexpected end tag (%(name)s)"
-         " in the after frameset phase. Ignored."),
+        _("Unexpected end tag (%(name)s)"
+          " in the after frameset phase. Ignored."),
     "unexpected-end-tag-after-body-innerhtml":
-       _("Unexpected end tag after body(innerHtml)"),
+        _("Unexpected end tag after body(innerHtml)"),
     "expected-eof-but-got-char":
-       _("Unexpected non-space characters. Expected end of file."),
+        _("Unexpected non-space characters. Expected end of file."),
     "expected-eof-but-got-start-tag":
-       _("Unexpected start tag (%(name)s)"
-         ". Expected end of file."),
+        _("Unexpected start tag (%(name)s)"
+          ". Expected end of file."),
     "expected-eof-but-got-end-tag":
-       _("Unexpected end tag (%(name)s)"
-         ". Expected end of file."),
+        _("Unexpected end tag (%(name)s)"
+          ". Expected end of file."),
     "eof-in-table":
-       _("Unexpected end of file. Expected table content."),
+        _("Unexpected end of file. Expected table content."),
     "eof-in-select":
-       _("Unexpected end of file. Expected select content."),
+        _("Unexpected end of file. Expected select content."),
     "eof-in-frameset":
-       _("Unexpected end of file. Expected frameset content."),
+        _("Unexpected end of file. Expected frameset content."),
     "eof-in-script-in-script":
-       _("Unexpected end of file. Expected script content."),
+        _("Unexpected end of file. Expected script content."),
     "eof-in-foreign-lands":
-       _("Unexpected end of file. Expected foreign content"),
+        _("Unexpected end of file. Expected foreign content"),
     "non-void-element-with-trailing-solidus":
-       _("Trailing solidus not allowed on element %(name)s"),
+        _("Trailing solidus not allowed on element %(name)s"),
     "unexpected-html-element-in-foreign-content":
-       _("Element %(name)s not allowed in a non-html context"),
+        _("Element %(name)s not allowed in a non-html context"),
     "unexpected-end-tag-before-html":
         _("Unexpected end tag (%(name)s) before html."),
     "XXX-undefined-error":
-        ("Undefined error (this sucks and should be fixed)"),
+        _("Undefined error (this sucks and should be fixed)"),
 }
 
 namespaces = {
-    "html":"http://www.w3.org/1999/xhtml",
-    "mathml":"http://www.w3.org/1998/Math/MathML",
-    "svg":"http://www.w3.org/2000/svg",
-    "xlink":"http://www.w3.org/1999/xlink",
-    "xml":"http://www.w3.org/XML/1998/namespace",
-    "xmlns":"http://www.w3.org/2000/xmlns/"
+    "html": "http://www.w3.org/1999/xhtml",
+    "mathml": "http://www.w3.org/1998/Math/MathML",
+    "svg": "http://www.w3.org/2000/svg",
+    "xlink": "http://www.w3.org/1999/xlink",
+    "xml": "http://www.w3.org/XML/1998/namespace",
+    "xmlns": "http://www.w3.org/2000/xmlns/"
 }
 
 scopingElements = frozenset((
@@ -454,8 +455,8 @@
 digits = frozenset(string.digits)
 hexDigits = frozenset(string.hexdigits)
 
-asciiUpper2Lower = dict([(ord(c),ord(c.lower()))
-    for c in string.ascii_uppercase])
+asciiUpper2Lower = dict([(ord(c), ord(c.lower()))
+                         for c in string.ascii_uppercase])
 
 # Heading elements need to be ordered
 headingElements = (
@@ -501,8 +502,8 @@
     "": frozenset(("irrelevant",)),
     "style": frozenset(("scoped",)),
     "img": frozenset(("ismap",)),
-    "audio": frozenset(("autoplay","controls")),
-    "video": frozenset(("autoplay","controls")),
+    "audio": frozenset(("autoplay", "controls")),
+    "video": frozenset(("autoplay", "controls")),
     "script": frozenset(("defer", "async")),
     "details": frozenset(("open",)),
     "datagrid": frozenset(("multiple", "disabled")),
@@ -521,38 +522,38 @@
 # entitiesWindows1252 has to be _ordered_ and needs to have an index. It
 # therefore can't be a frozenset.
 entitiesWindows1252 = (
-    8364,  # 0x80  0x20AC  EURO SIGN
-    65533, # 0x81          UNDEFINED
-    8218,  # 0x82  0x201A  SINGLE LOW-9 QUOTATION MARK
-    402,   # 0x83  0x0192  LATIN SMALL LETTER F WITH HOOK
-    8222,  # 0x84  0x201E  DOUBLE LOW-9 QUOTATION MARK
-    8230,  # 0x85  0x2026  HORIZONTAL ELLIPSIS
-    8224,  # 0x86  0x2020  DAGGER
-    8225,  # 0x87  0x2021  DOUBLE DAGGER
-    710,   # 0x88  0x02C6  MODIFIER LETTER CIRCUMFLEX ACCENT
-    8240,  # 0x89  0x2030  PER MILLE SIGN
-    352,   # 0x8A  0x0160  LATIN CAPITAL LETTER S WITH CARON
-    8249,  # 0x8B  0x2039  SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-    338,   # 0x8C  0x0152  LATIN CAPITAL LIGATURE OE
-    65533, # 0x8D          UNDEFINED
-    381,   # 0x8E  0x017D  LATIN CAPITAL LETTER Z WITH CARON
-    65533, # 0x8F          UNDEFINED
-    65533, # 0x90          UNDEFINED
-    8216,  # 0x91  0x2018  LEFT SINGLE QUOTATION MARK
-    8217,  # 0x92  0x2019  RIGHT SINGLE QUOTATION MARK
-    8220,  # 0x93  0x201C  LEFT DOUBLE QUOTATION MARK
-    8221,  # 0x94  0x201D  RIGHT DOUBLE QUOTATION MARK
-    8226,  # 0x95  0x2022  BULLET
-    8211,  # 0x96  0x2013  EN DASH
-    8212,  # 0x97  0x2014  EM DASH
-    732,   # 0x98  0x02DC  SMALL TILDE
-    8482,  # 0x99  0x2122  TRADE MARK SIGN
-    353,   # 0x9A  0x0161  LATIN SMALL LETTER S WITH CARON
-    8250,  # 0x9B  0x203A  SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-    339,   # 0x9C  0x0153  LATIN SMALL LIGATURE OE
-    65533, # 0x9D          UNDEFINED
-    382,   # 0x9E  0x017E  LATIN SMALL LETTER Z WITH CARON
-    376    # 0x9F  0x0178  LATIN CAPITAL LETTER Y WITH DIAERESIS
+    8364,   # 0x80  0x20AC  EURO SIGN
+    65533,  # 0x81          UNDEFINED
+    8218,   # 0x82  0x201A  SINGLE LOW-9 QUOTATION MARK
+    402,    # 0x83  0x0192  LATIN SMALL LETTER F WITH HOOK
+    8222,   # 0x84  0x201E  DOUBLE LOW-9 QUOTATION MARK
+    8230,   # 0x85  0x2026  HORIZONTAL ELLIPSIS
+    8224,   # 0x86  0x2020  DAGGER
+    8225,   # 0x87  0x2021  DOUBLE DAGGER
+    710,    # 0x88  0x02C6  MODIFIER LETTER CIRCUMFLEX ACCENT
+    8240,   # 0x89  0x2030  PER MILLE SIGN
+    352,    # 0x8A  0x0160  LATIN CAPITAL LETTER S WITH CARON
+    8249,   # 0x8B  0x2039  SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+    338,    # 0x8C  0x0152  LATIN CAPITAL LIGATURE OE
+    65533,  # 0x8D          UNDEFINED
+    381,    # 0x8E  0x017D  LATIN CAPITAL LETTER Z WITH CARON
+    65533,  # 0x8F          UNDEFINED
+    65533,  # 0x90          UNDEFINED
+    8216,   # 0x91  0x2018  LEFT SINGLE QUOTATION MARK
+    8217,   # 0x92  0x2019  RIGHT SINGLE QUOTATION MARK
+    8220,   # 0x93  0x201C  LEFT DOUBLE QUOTATION MARK
+    8221,   # 0x94  0x201D  RIGHT DOUBLE QUOTATION MARK
+    8226,   # 0x95  0x2022  BULLET
+    8211,   # 0x96  0x2013  EN DASH
+    8212,   # 0x97  0x2014  EM DASH
+    732,    # 0x98  0x02DC  SMALL TILDE
+    8482,   # 0x99  0x2122  TRADE MARK SIGN
+    353,    # 0x9A  0x0161  LATIN SMALL LETTER S WITH CARON
+    8250,   # 0x9B  0x203A  SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+    339,    # 0x9C  0x0153  LATIN SMALL LIGATURE OE
+    65533,  # 0x9D          UNDEFINED
+    382,    # 0x9E  0x017E  LATIN SMALL LETTER Z WITH CARON
+    376     # 0x9F  0x0178  LATIN CAPITAL LETTER Y WITH DIAERESIS
 )
 
 xmlEntities = frozenset(('lt;', 'gt;', 'amp;', 'apos;', 'quot;'))
@@ -2792,41 +2793,41 @@
 }
 
 replacementCharacters = {
-    0x0:"\uFFFD",
-    0x0d:"\u000D",
-    0x80:"\u20AC",
-    0x81:"\u0081",
-    0x81:"\u0081",
-    0x82:"\u201A",
-    0x83:"\u0192",
-    0x84:"\u201E",
-    0x85:"\u2026",
-    0x86:"\u2020",
-    0x87:"\u2021",
-    0x88:"\u02C6",
-    0x89:"\u2030",
-    0x8A:"\u0160",
-    0x8B:"\u2039",
-    0x8C:"\u0152",
-    0x8D:"\u008D",
-    0x8E:"\u017D",
-    0x8F:"\u008F",
-    0x90:"\u0090",
-    0x91:"\u2018",
-    0x92:"\u2019",
-    0x93:"\u201C",
-    0x94:"\u201D",
-    0x95:"\u2022",
-    0x96:"\u2013",
-    0x97:"\u2014",
-    0x98:"\u02DC",
-    0x99:"\u2122",
-    0x9A:"\u0161",
-    0x9B:"\u203A",
-    0x9C:"\u0153",
-    0x9D:"\u009D",
-    0x9E:"\u017E",
-    0x9F:"\u0178",
+    0x0: "\uFFFD",
+    0x0d: "\u000D",
+    0x80: "\u20AC",
+    0x81: "\u0081",
+    0x81: "\u0081",
+    0x82: "\u201A",
+    0x83: "\u0192",
+    0x84: "\u201E",
+    0x85: "\u2026",
+    0x86: "\u2020",
+    0x87: "\u2021",
+    0x88: "\u02C6",
+    0x89: "\u2030",
+    0x8A: "\u0160",
+    0x8B: "\u2039",
+    0x8C: "\u0152",
+    0x8D: "\u008D",
+    0x8E: "\u017D",
+    0x8F: "\u008F",
+    0x90: "\u0090",
+    0x91: "\u2018",
+    0x92: "\u2019",
+    0x93: "\u201C",
+    0x94: "\u201D",
+    0x95: "\u2022",
+    0x96: "\u2013",
+    0x97: "\u2014",
+    0x98: "\u02DC",
+    0x99: "\u2122",
+    0x9A: "\u0161",
+    0x9B: "\u203A",
+    0x9C: "\u0153",
+    0x9D: "\u009D",
+    0x9E: "\u017E",
+    0x9F: "\u0178",
 }
 
 encodings = {
@@ -3059,25 +3060,27 @@
     'x-x-big5': 'big5'}
 
 tokenTypes = {
-    "Doctype":0,
-    "Characters":1,
-    "SpaceCharacters":2,
-    "StartTag":3,
-    "EndTag":4,
-    "EmptyTag":5,
-    "Comment":6,
-    "ParseError":7
+    "Doctype": 0,
+    "Characters": 1,
+    "SpaceCharacters": 2,
+    "StartTag": 3,
+    "EndTag": 4,
+    "EmptyTag": 5,
+    "Comment": 6,
+    "ParseError": 7
 }
 
 tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"],
                            tokenTypes["EmptyTag"]))
 
 
-prefixes = dict([(v,k) for k,v in namespaces.items()])
+prefixes = dict([(v, k) for k, v in namespaces.items()])
 prefixes["http://www.w3.org/1998/Math/MathML"] = "math"
 
+
 class DataLossWarning(UserWarning):
     pass
 
+
 class ReparseException(Exception):
     pass
diff --git a/html5lib/filters/inject_meta_charset.py b/html5lib/filters/inject_meta_charset.py
index 65a3e902..ca33b70b 100644
--- a/html5lib/filters/inject_meta_charset.py
+++ b/html5lib/filters/inject_meta_charset.py
@@ -2,6 +2,7 @@
 
 from . import _base
 
+
 class Filter(_base.Filter):
     def __init__(self, source, encoding):
         _base.Filter.__init__(self, source)
@@ -20,21 +21,21 @@ def __iter__(self):
 
             elif type == "EmptyTag":
                 if token["name"].lower() == "meta":
-                   # replace charset with actual encoding
-                   has_http_equiv_content_type = False
-                   for (namespace,name),value in token["data"].items():
-                       if namespace != None:
-                           continue
-                       elif name.lower() == 'charset':
-                          token["data"][(namespace,name)] = self.encoding
-                          meta_found = True
-                          break
-                       elif name == 'http-equiv' and value.lower() == 'content-type':
-                           has_http_equiv_content_type = True
-                   else:
-                       if has_http_equiv_content_type and (None, "content") in token["data"]:
-                           token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding
-                           meta_found = True
+                    # replace charset with actual encoding
+                    has_http_equiv_content_type = False
+                    for (namespace, name), value in token["data"].items():
+                        if namespace is not None:
+                            continue
+                        elif name.lower() == 'charset':
+                            token["data"][(namespace, name)] = self.encoding
+                            meta_found = True
+                            break
+                        elif name == 'http-equiv' and value.lower() == 'content-type':
+                            has_http_equiv_content_type = True
+                    else:
+                        if has_http_equiv_content_type and (None, "content") in token["data"]:
+                            token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding
+                            meta_found = True
 
                 elif token["name"].lower() == "head" and not meta_found:
                     # insert meta into empty head
diff --git a/html5lib/filters/lint.py b/html5lib/filters/lint.py
index bf98708d..d6f37cf4 100644
--- a/html5lib/filters/lint.py
+++ b/html5lib/filters/lint.py
@@ -9,7 +9,10 @@
 from html5lib.constants import spaceCharacters
 spaceCharacters = "".join(spaceCharacters)
 
-class LintError(Exception): pass
+
+class LintError(Exception):
+    pass
+
 
 class Filter(_base.Filter):
     def __iter__(self):
diff --git a/html5lib/filters/optionaltags.py b/html5lib/filters/optionaltags.py
index 39d93ea5..fefe0b30 100644
--- a/html5lib/filters/optionaltags.py
+++ b/html5lib/filters/optionaltags.py
@@ -2,6 +2,7 @@
 
 from . import _base
 
+
 class Filter(_base.Filter):
     def slider(self):
         previous1 = previous2 = None
@@ -17,7 +18,7 @@ def __iter__(self):
             type = token["type"]
             if type == "StartTag":
                 if (token["data"] or
-                    not self.is_optional_start(token["name"], previous, next)):
+                        not self.is_optional_start(token["name"], previous, next)):
                     yield token
             elif type == "EndTag":
                 if not self.is_optional_end(token["name"], next):
@@ -75,7 +76,7 @@ def is_optional_start(self, tagname, previous, next):
                 # omit the thead and tfoot elements' end tag when they are
                 # immediately followed by a tbody element. See is_optional_end.
                 if previous and previous['type'] == 'EndTag' and \
-                  previous['name'] in ('tbody','thead','tfoot'):
+                        previous['name'] in ('tbody', 'thead', 'tfoot'):
                     return False
                 return next["name"] == 'tr'
             else:
diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py
index adaee595..2692023d 100644
--- a/html5lib/filters/sanitizer.py
+++ b/html5lib/filters/sanitizer.py
@@ -3,8 +3,10 @@
 from . import _base
 from html5lib.sanitizer import HTMLSanitizerMixin
 
+
 class Filter(_base.Filter, HTMLSanitizerMixin):
     def __iter__(self):
         for token in _base.Filter.__iter__(self):
             token = self.sanitize_token(token)
-            if token: yield token
+            if token:
+                yield token
diff --git a/html5lib/filters/whitespace.py b/html5lib/filters/whitespace.py
index c2b7fb12..1f309236 100644
--- a/html5lib/filters/whitespace.py
+++ b/html5lib/filters/whitespace.py
@@ -8,6 +8,7 @@
 
 SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
 
+
 class Filter(_base.Filter):
 
     spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
@@ -17,7 +18,7 @@ def __iter__(self):
         for token in _base.Filter.__iter__(self):
             type = token["type"]
             if type == "StartTag" \
-              and (preserve or token["name"] in self.spacePreserveElements):
+                    and (preserve or token["name"] in self.spacePreserveElements):
                 preserve += 1
 
             elif type == "EndTag" and preserve:
@@ -32,6 +33,6 @@ def __iter__(self):
 
             yield token
 
+
 def collapse_spaces(text):
     return SPACES_REGEX.sub(' ', text)
-
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
index 989691a4..dab175dd 100644
--- a/html5lib/html5parser.py
+++ b/html5lib/html5parser.py
@@ -1,7 +1,6 @@
 from __future__ import absolute_import, division, unicode_literals
 from six import with_metaclass
 
-import sys
 import types
 
 from . import inputstream
@@ -14,12 +13,13 @@
 from . import utils
 from . import constants
 from .constants import spaceCharacters, asciiUpper2Lower
-from .constants import formattingElements, specialElements
-from .constants import headingElements, tableInsertModeElements
-from .constants import cdataElements, rcdataElements, voidElements
-from .constants import tokenTypes, ReparseException, namespaces, spaceCharacters
+from .constants import specialElements
+from .constants import headingElements
+from .constants import cdataElements, rcdataElements
+from .constants import tokenTypes, ReparseException, namespaces
 from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements
 
+
 def parse(doc, treebuilder="simpletree", encoding=None,
           namespaceHTMLElements=True):
     """Parse a string or file-like object into a tree"""
@@ -27,30 +27,33 @@ def parse(doc, treebuilder="simpletree", encoding=None,
     p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
     return p.parse(doc, encoding=encoding)
 
+
 def parseFragment(doc, container="div", treebuilder="simpletree", encoding=None,
                   namespaceHTMLElements=True):
     tb = treebuilders.getTreeBuilder(treebuilder)
     p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
     return p.parseFragment(doc, container=container, encoding=encoding)
 
+
 def method_decorator_metaclass(function):
     class Decorated(type):
         def __new__(meta, classname, bases, classDict):
             for attributeName, attribute in classDict.items():
-                if type(attribute) == types.FunctionType:
+                if isinstance(attribute, types.FunctionType):
                     attribute = function(attribute)
 
                 classDict[attributeName] = attribute
-            return  type.__new__(meta, classname, bases, classDict)
+            return type.__new__(meta, classname, bases, classDict)
     return Decorated
 
+
 class HTMLParser(object):
     """HTML parser. Generates a tree structure from a stream of (possibly
         malformed) HTML"""
 
-    def __init__(self, tree = simpletree.TreeBuilder,
-                 tokenizer = tokenizer.HTMLTokenizer, strict = False,
-                 namespaceHTMLElements = True, debug=False):
+    def __init__(self, tree=simpletree.TreeBuilder,
+                 tokenizer=tokenizer.HTMLTokenizer, strict=False,
+                 namespaceHTMLElements=True, debug=False):
         """
         strict - raise an exception when a parse error is encountered
 
@@ -88,14 +91,14 @@ def _parse(self, stream, innerHTML=False, container="div",
             try:
                 self.mainLoop()
                 break
-            except ReparseException as e:
+            except ReparseException:
                 self.reset()
 
     def reset(self):
         self.tree.reset()
         self.firstStartTag = False
         self.errors = []
-        self.log = [] #only used with debug mode
+        self.log = []  # only used with debug mode
         # "quirks" / "limited quirks" / "no quirks"
         self.compatMode = "no quirks"
 
@@ -127,7 +130,7 @@ def reset(self):
 
     def isHTMLIntegrationPoint(self, element):
         if (element.name == "annotation-xml" and
-            element.namespace == namespaces["mathml"]):
+                element.namespace == namespaces["mathml"]):
             return ("encoding" in element.attributes and
                     element.attributes["encoding"].translate(
                         asciiUpper2Lower) in
@@ -178,7 +181,7 @@ def mainLoop(self):
                     if type == CharactersToken:
                         new_token = phase.processCharacters(new_token)
                     elif type == SpaceCharactersToken:
-                         new_token= phase.processSpaceCharacters(new_token)
+                        new_token = phase.processSpaceCharacters(new_token)
                     elif type == StartTagToken:
                         new_token = phase.processStartTag(new_token)
                     elif type == EndTagToken:
@@ -189,10 +192,9 @@ def mainLoop(self):
                         new_token = phase.processDoctype(new_token)
 
             if (type == StartTagToken and token["selfClosing"]
-                and not token["selfClosingAcknowledged"]):
+                    and not token["selfClosingAcknowledged"]):
                 self.parseError("non-void-element-with-trailing-solidus",
-                                {"name":token["name"]})
-
+                                {"name": token["name"]})
 
         # When the loop finishes it's EOF
         reprocess = True
@@ -253,77 +255,77 @@ def normalizeToken(self, token):
         return token
 
     def adjustMathMLAttributes(self, token):
-        replacements = {"definitionurl":"definitionURL"}
-        for k,v in replacements.items():
+        replacements = {"definitionurl": "definitionURL"}
+        for k, v in replacements.items():
             if k in token["data"]:
                 token["data"][v] = token["data"][k]
                 del token["data"][k]
 
     def adjustSVGAttributes(self, token):
         replacements = {
-            "attributename":"attributeName",
-            "attributetype":"attributeType",
-            "basefrequency":"baseFrequency",
-            "baseprofile":"baseProfile",
-            "calcmode":"calcMode",
-            "clippathunits":"clipPathUnits",
-            "contentscripttype":"contentScriptType",
-            "contentstyletype":"contentStyleType",
-            "diffuseconstant":"diffuseConstant",
-            "edgemode":"edgeMode",
-            "externalresourcesrequired":"externalResourcesRequired",
-            "filterres":"filterRes",
-            "filterunits":"filterUnits",
-            "glyphref":"glyphRef",
-            "gradienttransform":"gradientTransform",
-            "gradientunits":"gradientUnits",
-            "kernelmatrix":"kernelMatrix",
-            "kernelunitlength":"kernelUnitLength",
-            "keypoints":"keyPoints",
-            "keysplines":"keySplines",
-            "keytimes":"keyTimes",
-            "lengthadjust":"lengthAdjust",
-            "limitingconeangle":"limitingConeAngle",
-            "markerheight":"markerHeight",
-            "markerunits":"markerUnits",
-            "markerwidth":"markerWidth",
-            "maskcontentunits":"maskContentUnits",
-            "maskunits":"maskUnits",
-            "numoctaves":"numOctaves",
-            "pathlength":"pathLength",
-            "patterncontentunits":"patternContentUnits",
-            "patterntransform":"patternTransform",
-            "patternunits":"patternUnits",
-            "pointsatx":"pointsAtX",
-            "pointsaty":"pointsAtY",
-            "pointsatz":"pointsAtZ",
-            "preservealpha":"preserveAlpha",
-            "preserveaspectratio":"preserveAspectRatio",
-            "primitiveunits":"primitiveUnits",
-            "refx":"refX",
-            "refy":"refY",
-            "repeatcount":"repeatCount",
-            "repeatdur":"repeatDur",
-            "requiredextensions":"requiredExtensions",
-            "requiredfeatures":"requiredFeatures",
-            "specularconstant":"specularConstant",
-            "specularexponent":"specularExponent",
-            "spreadmethod":"spreadMethod",
-            "startoffset":"startOffset",
-            "stddeviation":"stdDeviation",
-            "stitchtiles":"stitchTiles",
-            "surfacescale":"surfaceScale",
-            "systemlanguage":"systemLanguage",
-            "tablevalues":"tableValues",
-            "targetx":"targetX",
-            "targety":"targetY",
-            "textlength":"textLength",
-            "viewbox":"viewBox",
-            "viewtarget":"viewTarget",
-            "xchannelselector":"xChannelSelector",
-            "ychannelselector":"yChannelSelector",
-            "zoomandpan":"zoomAndPan"
-            }
+            "attributename": "attributeName",
+            "attributetype": "attributeType",
+            "basefrequency": "baseFrequency",
+            "baseprofile": "baseProfile",
+            "calcmode": "calcMode",
+            "clippathunits": "clipPathUnits",
+            "contentscripttype": "contentScriptType",
+            "contentstyletype": "contentStyleType",
+            "diffuseconstant": "diffuseConstant",
+            "edgemode": "edgeMode",
+            "externalresourcesrequired": "externalResourcesRequired",
+            "filterres": "filterRes",
+            "filterunits": "filterUnits",
+            "glyphref": "glyphRef",
+            "gradienttransform": "gradientTransform",
+            "gradientunits": "gradientUnits",
+            "kernelmatrix": "kernelMatrix",
+            "kernelunitlength": "kernelUnitLength",
+            "keypoints": "keyPoints",
+            "keysplines": "keySplines",
+            "keytimes": "keyTimes",
+            "lengthadjust": "lengthAdjust",
+            "limitingconeangle": "limitingConeAngle",
+            "markerheight": "markerHeight",
+            "markerunits": "markerUnits",
+            "markerwidth": "markerWidth",
+            "maskcontentunits": "maskContentUnits",
+            "maskunits": "maskUnits",
+            "numoctaves": "numOctaves",
+            "pathlength": "pathLength",
+            "patterncontentunits": "patternContentUnits",
+            "patterntransform": "patternTransform",
+            "patternunits": "patternUnits",
+            "pointsatx": "pointsAtX",
+            "pointsaty": "pointsAtY",
+            "pointsatz": "pointsAtZ",
+            "preservealpha": "preserveAlpha",
+            "preserveaspectratio": "preserveAspectRatio",
+            "primitiveunits": "primitiveUnits",
+            "refx": "refX",
+            "refy": "refY",
+            "repeatcount": "repeatCount",
+            "repeatdur": "repeatDur",
+            "requiredextensions": "requiredExtensions",
+            "requiredfeatures": "requiredFeatures",
+            "specularconstant": "specularConstant",
+            "specularexponent": "specularExponent",
+            "spreadmethod": "spreadMethod",
+            "startoffset": "startOffset",
+            "stddeviation": "stdDeviation",
+            "stitchtiles": "stitchTiles",
+            "surfacescale": "surfaceScale",
+            "systemlanguage": "systemLanguage",
+            "tablevalues": "tableValues",
+            "targetx": "targetX",
+            "targety": "targetY",
+            "textlength": "textLength",
+            "viewbox": "viewBox",
+            "viewtarget": "viewTarget",
+            "xchannelselector": "xChannelSelector",
+            "ychannelselector": "yChannelSelector",
+            "zoomandpan": "zoomAndPan"
+        }
         for originalName in list(token["data"].keys()):
             if originalName in replacements:
                 svgName = replacements[originalName]
@@ -332,19 +334,19 @@ def adjustSVGAttributes(self, token):
 
     def adjustForeignAttributes(self, token):
         replacements = {
-            "xlink:actuate":("xlink", "actuate", namespaces["xlink"]),
-            "xlink:arcrole":("xlink", "arcrole", namespaces["xlink"]),
-            "xlink:href":("xlink", "href", namespaces["xlink"]),
-            "xlink:role":("xlink", "role", namespaces["xlink"]),
-            "xlink:show":("xlink", "show", namespaces["xlink"]),
-            "xlink:title":("xlink", "title", namespaces["xlink"]),
-            "xlink:type":("xlink", "type", namespaces["xlink"]),
-            "xml:base":("xml", "base", namespaces["xml"]),
-            "xml:lang":("xml", "lang", namespaces["xml"]),
-            "xml:space":("xml", "space", namespaces["xml"]),
-            "xmlns":(None, "xmlns", namespaces["xmlns"]),
-            "xmlns:xlink":("xmlns", "xlink", namespaces["xmlns"])
-            }
+            "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
+            "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]),
+            "xlink:href": ("xlink", "href", namespaces["xlink"]),
+            "xlink:role": ("xlink", "role", namespaces["xlink"]),
+            "xlink:show": ("xlink", "show", namespaces["xlink"]),
+            "xlink:title": ("xlink", "title", namespaces["xlink"]),
+            "xlink:type": ("xlink", "type", namespaces["xlink"]),
+            "xml:base": ("xml", "base", namespaces["xml"]),
+            "xml:lang": ("xml", "lang", namespaces["xml"]),
+            "xml:space": ("xml", "space", namespaces["xml"]),
+            "xmlns": (None, "xmlns", namespaces["xmlns"]),
+            "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"])
+        }
 
         for originalName in token["data"].keys():
             if originalName in replacements:
@@ -360,20 +362,20 @@ def resetInsertionMode(self):
         # specification.)
         last = False
         newModes = {
-            "select":"inSelect",
-            "td":"inCell",
-            "th":"inCell",
-            "tr":"inRow",
-            "tbody":"inTableBody",
-            "thead":"inTableBody",
-            "tfoot":"inTableBody",
-            "caption":"inCaption",
-            "colgroup":"inColumnGroup",
-            "table":"inTable",
-            "head":"inBody",
-            "body":"inBody",
-            "frameset":"inFrameset",
-            "html":"beforeHead"
+            "select": "inSelect",
+            "td": "inCell",
+            "th": "inCell",
+            "tr": "inRow",
+            "tbody": "inTableBody",
+            "thead": "inTableBody",
+            "tfoot": "inTableBody",
+            "caption": "inCaption",
+            "colgroup": "inColumnGroup",
+            "table": "inTable",
+            "head": "inBody",
+            "body": "inBody",
+            "frameset": "inFrameset",
+            "html": "beforeHead"
         }
         for node in self.tree.openElements[::-1]:
             nodeName = node.name
@@ -405,7 +407,7 @@ def parseRCDataRawtext(self, token, contentType):
         """
         assert contentType in ("RAWTEXT", "RCDATA")
 
-        element = self.tree.insertElement(token)
+        self.tree.insertElement(token)
 
         if contentType == "RAWTEXT":
             self.tokenizer.state = self.tokenizer.rawtextState
@@ -416,16 +418,18 @@ def parseRCDataRawtext(self, token, contentType):
 
         self.phase = self.phases["text"]
 
+
 def getPhases(debug):
     def log(function):
         """Logger that records which phase processes each token"""
         type_names = dict((value, key) for key, value in
                           constants.tokenTypes.items())
+
         def wrapped(self, *args, **kwargs):
             if function.__name__.startswith("process") and len(args) > 0:
                 token = args[0]
                 try:
-                    info = {"type":type_names[token['type']]}
+                    info = {"type": type_names[token['type']]}
                 except:
                     raise
                 if token['type'] in constants.tagTokenTypes:
@@ -476,8 +480,8 @@ def processStartTag(self, token):
             return self.startTagHandler[token["name"]](token)
 
         def startTagHtml(self, token):
-            if self.parser.firstStartTag == False and token["name"] == "html":
-               self.parser.parseError("non-html-root")
+            if not self.parser.firstStartTag and token["name"] == "html":
+                self.parser.parseError("non-html-root")
             # XXX Need a check here to see if the first start tag token emitted is
             # this token... If it's not, invoke self.parser.parseError().
             for attr, value in token["data"].items():
@@ -501,8 +505,8 @@ def processDoctype(self, token):
             systemId = token["systemId"]
             correct = token["correct"]
 
-            if (name != "html" or publicId != None or
-                systemId != None and systemId != "about:legacy-compat"):
+            if (name != "html" or publicId is not None or
+                    systemId is not None and systemId != "about:legacy-compat"):
                 self.parser.parseError("unknown-doctype")
 
             if publicId is None:
@@ -577,8 +581,8 @@ def processDoctype(self, token):
                 or publicId.startswith(
                     ("-//w3c//dtd html 4.01 frameset//",
                      "-//w3c//dtd html 4.01 transitional//")) and
-                    systemId == None
-                or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
+                    systemId is None
+                    or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
                 self.parser.compatMode = "quirks"
             elif (publicId.startswith(
                     ("-//w3c//dtd xhtml 1.0 frameset//",
@@ -586,7 +590,7 @@ def processDoctype(self, token):
                   or publicId.startswith(
                       ("-//w3c//dtd html 4.01 frameset//",
                        "-//w3c//dtd html 4.01 transitional//")) and
-                      systemId != None):
+                  systemId is not None):
                 self.parser.compatMode = "limited quirks"
 
             self.parser.phase = self.parser.phases["beforeHtml"]
@@ -602,13 +606,13 @@ def processCharacters(self, token):
 
         def processStartTag(self, token):
             self.parser.parseError("expected-doctype-but-got-start-tag",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
             self.anythingElse()
             return token
 
         def processEndTag(self, token):
             self.parser.parseError("expected-doctype-but-got-end-tag",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
             self.anythingElse()
             return token
 
@@ -617,7 +621,6 @@ def processEOF(self):
             self.anythingElse()
             return True
 
-
     class BeforeHtmlPhase(Phase):
         # helper methods
         def insertHtmlElement(self):
@@ -648,12 +651,11 @@ def processStartTag(self, token):
         def processEndTag(self, token):
             if token["name"] not in ("head", "body", "html", "br"):
                 self.parser.parseError("unexpected-end-tag-before-html",
-                  {"name": token["name"]})
+                                       {"name": token["name"]})
             else:
                 self.insertHtmlElement()
                 return token
 
-
     class BeforeHeadPhase(Phase):
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
@@ -698,13 +700,13 @@ def endTagImplyHead(self, token):
 
         def endTagOther(self, token):
             self.parser.parseError("end-tag-after-implied-root",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
 
     class InHeadPhase(Phase):
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
 
-            self.startTagHandler =  utils.MethodDispatcher([
+            self.startTagHandler = utils.MethodDispatcher([
                 ("html", self.startTagHtml),
                 ("title", self.startTagTitle),
                 (("noscript", "noframes", "style"), self.startTagNoScriptNoFramesStyle),
@@ -723,7 +725,7 @@ def __init__(self, parser, tree):
             self.endTagHandler.default = self.endTagOther
 
         # the real thing
-        def processEOF (self):
+        def processEOF(self):
             self.anythingElse()
             return True
 
@@ -767,7 +769,7 @@ def startTagTitle(self, token):
             self.parser.parseRCDataRawtext(token, "RCDATA")
 
         def startTagNoScriptNoFramesStyle(self, token):
-            #Need to decide whether to implement the scripting-disabled case
+            # Need to decide whether to implement the scripting-disabled case
             self.parser.parseRCDataRawtext(token, "RAWTEXT")
 
         def startTagScript(self, token):
@@ -782,7 +784,7 @@ def startTagOther(self, token):
 
         def endTagHead(self, token):
             node = self.parser.tree.openElements.pop()
-            assert node.name == "head", "Expected head got %s"%node.name
+            assert node.name == "head", "Expected head got %s" % node.name
             self.parser.phase = self.parser.phases["afterHead"]
 
         def endTagHtmlBodyBr(self, token):
@@ -795,12 +797,10 @@ def endTagOther(self, token):
         def anythingElse(self):
             self.endTagHead(impliedTagToken("head"))
 
-
     # XXX If we implement a parser for which scripting is disabled we need to
     # implement this phase.
     #
     # class InHeadNoScriptPhase(Phase):
-
     class AfterHeadPhase(Phase):
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
@@ -811,7 +811,7 @@ def __init__(self, parser, tree):
                 ("frameset", self.startTagFrameset),
                 (("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
                   "style", "title"),
-                  self.startTagFromHead),
+                 self.startTagFromHead),
                 ("head", self.startTagHead)
             ])
             self.startTagHandler.default = self.startTagOther
@@ -841,7 +841,7 @@ def startTagFrameset(self, token):
 
         def startTagFromHead(self, token):
             self.parser.parseError("unexpected-start-tag-out-of-my-head",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
             self.tree.openElements.append(self.tree.headPointer)
             self.parser.phases["inHead"].processStartTag(token)
             for node in self.tree.openElements[::-1]:
@@ -850,7 +850,7 @@ def startTagFromHead(self, token):
                     break
 
         def startTagHead(self, token):
-            self.parser.parseError("unexpected-start-tag", {"name":token["name"]})
+            self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
 
         def startTagOther(self, token):
             self.anythingElse()
@@ -861,21 +861,20 @@ def endTagHtmlBodyBr(self, token):
             return token
 
         def endTagOther(self, token):
-            self.parser.parseError("unexpected-end-tag", {"name":token["name"]})
+            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
 
         def anythingElse(self):
             self.tree.insertElement(impliedTagToken("body", "StartTag"))
             self.parser.phase = self.parser.phases["inBody"]
             self.parser.framesetOK = True
 
-
     class InBodyPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
         # the really-really-really-very crazy mode
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
 
-            #Keep a ref to this for special handling of whitespace in <pre>
+            # Keep a ref to this for special handling of whitespace in <pre>
             self.processSpaceCharactersNonPre = self.processSpaceCharacters
 
             self.startTagHandler = utils.MethodDispatcher([
@@ -889,15 +888,15 @@ def __init__(self, parser, tree):
                   "details", "dir", "div", "dl", "fieldset", "figcaption", "figure",
                   "footer", "header", "hgroup", "menu", "nav", "ol", "p",
                   "section", "summary", "ul"),
-                  self.startTagCloseP),
+                 self.startTagCloseP),
                 (headingElements, self.startTagHeading),
                 (("pre", "listing"), self.startTagPreListing),
                 ("form", self.startTagForm),
                 (("li", "dd", "dt"), self.startTagListItem),
-                ("plaintext",self.startTagPlaintext),
+                ("plaintext", self.startTagPlaintext),
                 ("a", self.startTagA),
                 (("b", "big", "code", "em", "font", "i", "s", "small", "strike",
-                  "strong", "tt", "u"),self.startTagFormatting),
+                  "strong", "tt", "u"), self.startTagFormatting),
                 ("nobr", self.startTagNobr),
                 ("button", self.startTagButton),
                 (("applet", "marquee", "object"), self.startTagAppletMarqueeObject),
@@ -925,21 +924,21 @@ def __init__(self, parser, tree):
             self.startTagHandler.default = self.startTagOther
 
             self.endTagHandler = utils.MethodDispatcher([
-                ("body",self.endTagBody),
-                ("html",self.endTagHtml),
+                ("body", self.endTagBody),
+                ("html", self.endTagHtml),
                 (("address", "article", "aside", "blockquote", "center",
                   "details", "dir", "div", "dl", "fieldset", "figcaption", "figure",
                   "footer", "header", "hgroup", "listing", "menu", "nav", "ol", "pre",
                   "section", "summary", "ul"), self.endTagBlock),
                 ("form", self.endTagForm),
-                ("p",self.endTagP),
+                ("p", self.endTagP),
                 (("dd", "dt", "li"), self.endTagListItem),
                 (headingElements, self.endTagHeading),
                 (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
                   "strike", "strong", "tt", "u"), self.endTagFormatting),
-                (("applet",  "marquee", "object"), self.endTagAppletMarqueeObject),
+                (("applet", "marquee", "object"), self.endTagAppletMarqueeObject),
                 ("br", self.endTagBr),
-                ])
+            ])
             self.endTagHandler.default = self.endTagOther
 
         def isMatchingFormattingElement(self, node1, node2):
@@ -981,7 +980,7 @@ def processEOF(self):
                 if node.name not in allowed_elements:
                     self.parser.parseError("expected-closing-tag-but-got-eof")
                     break
-            #Stop parsing
+            # Stop parsing
 
         def processSpaceCharactersDropNewline(self, token):
             # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we
@@ -990,7 +989,7 @@ def processSpaceCharactersDropNewline(self, token):
             self.processSpaceCharacters = self.processSpaceCharactersNonPre
             if (data.startswith("\n") and
                 self.tree.openElements[-1].name in ("pre", "listing", "textarea")
-                and not self.tree.openElements[-1].hasContent()):
+                    and not self.tree.openElements[-1].hasContent()):
                 data = data[1:]
             if data:
                 self.tree.reconstructActiveFormattingElements()
@@ -998,11 +997,11 @@ def processSpaceCharactersDropNewline(self, token):
 
         def processCharacters(self, token):
             if token["data"] == "\u0000":
-                #The tokenizer should always emit null on its own
+                # The tokenizer should always emit null on its own
                 return
             self.tree.reconstructActiveFormattingElements()
             self.tree.insertText(token["data"])
-            #This must be bad for performance
+            # This must be bad for performance
             if (self.parser.framesetOK and
                 any([char not in spaceCharacters
                      for char in token["data"]])):
@@ -1018,7 +1017,7 @@ def startTagProcessInHead(self, token):
         def startTagBody(self, token):
             self.parser.parseError("unexpected-start-tag", {"name": "body"})
             if (len(self.tree.openElements) == 1
-                or self.tree.openElements[1].name != "body"):
+                    or self.tree.openElements[1].name != "body"):
                 assert self.parser.innerHTML
             else:
                 self.parser.framesetOK = False
@@ -1064,9 +1063,9 @@ def startTagForm(self, token):
         def startTagListItem(self, token):
             self.parser.framesetOK = False
 
-            stopNamesMap = {"li":["li"],
-                            "dt":["dt", "dd"],
-                            "dd":["dt", "dd"]}
+            stopNamesMap = {"li": ["li"],
+                            "dt": ["dt", "dd"],
+                            "dd": ["dt", "dd"]}
             stopNames = stopNamesMap[token["name"]]
             for node in reversed(self.tree.openElements):
                 if node.name in stopNames:
@@ -1074,7 +1073,7 @@ def startTagListItem(self, token):
                         impliedTagToken(node.name, "EndTag"))
                     break
                 if (node.nameTuple in specialElements and
-                    node.name not in ("address", "div", "p")):
+                        node.name not in ("address", "div", "p")):
                     break
 
             if self.tree.elementInScope("p", variant="button"):
@@ -1101,7 +1100,7 @@ def startTagA(self, token):
             afeAElement = self.tree.elementInActiveFormattingElements("a")
             if afeAElement:
                 self.parser.parseError("unexpected-start-tag-implies-end-tag",
-                  {"startName": "a", "endName": "a"})
+                                       {"startName": "a", "endName": "a"})
                 self.endTagFormatting(impliedTagToken("a"))
                 if afeAElement in self.tree.openElements:
                     self.tree.openElements.remove(afeAElement)
@@ -1118,7 +1117,7 @@ def startTagNobr(self, token):
             self.tree.reconstructActiveFormattingElements()
             if self.tree.elementInScope("nobr"):
                 self.parser.parseError("unexpected-start-tag-implies-end-tag",
-                  {"startName": "nobr", "endName": "nobr"})
+                                       {"startName": "nobr", "endName": "nobr"})
                 self.processEndTag(impliedTagToken("nobr"))
                 # XXX Need tests that trigger the following
                 self.tree.reconstructActiveFormattingElements()
@@ -1127,7 +1126,7 @@ def startTagNobr(self, token):
         def startTagButton(self, token):
             if self.tree.elementInScope("button"):
                 self.parser.parseError("unexpected-start-tag-implies-end-tag",
-                  {"startName": "button", "endName": "button"})
+                                       {"startName": "button", "endName": "button"})
                 self.processEndTag(impliedTagToken("button"))
                 return token
             else:
@@ -1167,8 +1166,8 @@ def startTagInput(self, token):
             framesetOK = self.parser.framesetOK
             self.startTagVoidFormatting(token)
             if ("type" in token["data"] and
-                token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
-                #input type=hidden doesn't change framesetOK
+                    token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
+                # input type=hidden doesn't change framesetOK
                 self.parser.framesetOK = framesetOK
 
         def startTagParamSource(self, token):
@@ -1187,7 +1186,7 @@ def startTagHr(self, token):
         def startTagImage(self, token):
             # No really...
             self.parser.parseError("unexpected-start-tag-treated-as",
-              {"originalName": "image", "newName": "img"})
+                                   {"originalName": "image", "newName": "img"})
             self.processStartTag(impliedTagToken("img", "StartTag",
                                                  attributes=token["data"],
                                                  selfClosing=token["selfClosing"]))
@@ -1209,7 +1208,7 @@ def startTagIsIndex(self, token):
             else:
                 prompt = "This is a searchable index. Enter search keywords: "
             self.processCharacters(
-                {"type":tokenTypes["Characters"], "data":prompt})
+                {"type": tokenTypes["Characters"], "data": prompt})
             attributes = token["data"].copy()
             if "action" in attributes:
                 del attributes["action"]
@@ -1217,8 +1216,8 @@ def startTagIsIndex(self, token):
                 del attributes["prompt"]
             attributes["name"] = "isindex"
             self.processStartTag(impliedTagToken("input", "StartTag",
-                                                 attributes = attributes,
-                                                 selfClosing =
+                                                 attributes=attributes,
+                                                 selfClosing=
                                                  token["selfClosing"]))
             self.processEndTag(impliedTagToken("label"))
             self.processStartTag(impliedTagToken("hr", "StartTag"))
@@ -1271,8 +1270,8 @@ def startTagMath(self, token):
             self.parser.adjustForeignAttributes(token)
             token["namespace"] = namespaces["mathml"]
             self.tree.insertElement(token)
-            #Need to get the parse error right for the case where the token
-            #has a namespace not equal to the xmlns attribute
+            # Need to get the parse error right for the case where the token
+            # has a namespace not equal to the xmlns attribute
             if token["selfClosing"]:
                 self.tree.openElements.pop()
                 token["selfClosingAcknowledged"] = True
@@ -1283,8 +1282,8 @@ def startTagSvg(self, token):
             self.parser.adjustForeignAttributes(token)
             token["namespace"] = namespaces["svg"]
             self.tree.insertElement(token)
-            #Need to get the parse error right for the case where the token
-            #has a namespace not equal to the xmlns attribute
+            # Need to get the parse error right for the case where the token
+            # has a namespace not equal to the xmlns attribute
             if token["selfClosing"]:
                 self.tree.openElements.pop()
                 token["selfClosingAcknowledged"] = True
@@ -1326,7 +1325,7 @@ def endTagBody(self, token):
                                                    "tbody", "td", "tfoot",
                                                    "th", "thead", "tr", "body",
                                                    "html")):
-                        #Not sure this is the correct name for the parse error
+                        # Not sure this is the correct name for the parse error
                         self.parser.parseError(
                             "expected-one-end-tag-but-got-another",
                             {"expectedName": "body", "gotName": node.name})
@@ -1334,20 +1333,20 @@ def endTagBody(self, token):
             self.parser.phase = self.parser.phases["afterBody"]
 
         def endTagHtml(self, token):
-            #We repeat the test for the body end tag token being ignored here
+            # We repeat the test for the body end tag token being ignored here
             if self.tree.elementInScope("body"):
                 self.endTagBody(impliedTagToken("body"))
                 return token
 
         def endTagBlock(self, token):
-            #Put us back in the right whitespace handling mode
+            # Put us back in the right whitespace handling mode
             if token["name"] == "pre":
                 self.processSpaceCharacters = self.processSpaceCharactersNonPre
             inScope = self.tree.elementInScope(token["name"])
             if inScope:
                 self.tree.generateImpliedEndTags()
             if self.tree.openElements[-1].name != token["name"]:
-                 self.parser.parseError("end-tag-too-early", {"name": token["name"]})
+                self.parser.parseError("end-tag-too-early", {"name": token["name"]})
             if inScope:
                 node = self.tree.openElements.pop()
                 while node.name != token["name"]:
@@ -1358,7 +1357,7 @@ def endTagForm(self, token):
             self.tree.formPointer = None
             if node is None or not self.tree.elementInScope(node):
                 self.parser.parseError("unexpected-end-tag",
-                                       {"name":"form"})
+                                       {"name": "form"})
             else:
                 self.tree.generateImpliedEndTags()
                 if self.tree.openElements[-1] != node:
@@ -1374,7 +1373,7 @@ def endTagListItem(self, token):
             if not self.tree.elementInScope(token["name"], variant=variant):
                 self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
             else:
-                self.tree.generateImpliedEndTags(exclude = token["name"])
+                self.tree.generateImpliedEndTags(exclude=token["name"])
                 if self.tree.openElements[-1].name != token["name"]:
                     self.parser.parseError(
                         "end-tag-too-early",
@@ -1402,7 +1401,6 @@ def endTagFormatting(self, token):
             """The much-feared adoption agency algorithm"""
             # http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
             # XXX Better parseError messages appreciated.
-            name = token["name"]
 
             # Step 1
             outerLoopCounter = 0
@@ -1441,7 +1439,6 @@ def endTagFormatting(self, token):
                     self.tree.activeFormattingElements.remove(formattingElement)
                     return
 
-
                 # Otherwise, if there is such a node, and that node is
                 # also in the stack of open elements, but the element
                 # is not in scope, then this is a parse error; ignore
@@ -1474,10 +1471,10 @@ def endTagFormatting(self, token):
                         element = self.tree.openElements.pop()
                     self.tree.activeFormattingElements.remove(element)
                     return
-                commonAncestor = self.tree.openElements[afeIndex-1]
+                commonAncestor = self.tree.openElements[afeIndex - 1]
 
                 # Step 5
-                #if furthestBlock.parent:
+                # if furthestBlock.parent:
                 #    furthestBlock.parent.removeChild(furthestBlock)
 
                 # Step 5
@@ -1508,7 +1505,7 @@ def endTagFormatting(self, token):
                         bookmark = (self.tree.activeFormattingElements.index(node)
                                     + 1)
                     # Step 6.5
-                    #cite = node.parent
+                    # cite = node.parent
                     clone = node.cloneNode()
                     # Replace node with clone
                     self.tree.activeFormattingElements[
@@ -1555,7 +1552,7 @@ def endTagFormatting(self, token):
                 # Step 12
                 self.tree.openElements.remove(formattingElement)
                 self.tree.openElements.insert(
-                  self.tree.openElements.index(furthestBlock) + 1, clone)
+                    self.tree.openElements.index(furthestBlock) + 1, clone)
 
         def endTagAppletMarqueeObject(self, token):
             if self.tree.elementInScope(token["name"]):
@@ -1571,7 +1568,7 @@ def endTagAppletMarqueeObject(self, token):
 
         def endTagBr(self, token):
             self.parser.parseError("unexpected-end-tag-treated-as",
-              {"originalName": "br", "newName": "br element"})
+                                   {"originalName": "br", "newName": "br element"})
             self.tree.reconstructActiveFormattingElements()
             self.tree.insertElement(impliedTagToken("br", "StartTag"))
             self.tree.openElements.pop()
@@ -1596,7 +1593,7 @@ def __init__(self, parser, tree):
             self.startTagHandler = utils.MethodDispatcher([])
             self.startTagHandler.default = self.startTagOther
             self.endTagHandler = utils.MethodDispatcher([
-                    ("script", self.endTagScript)])
+                ("script", self.endTagScript)])
             self.endTagHandler.default = self.endTagOther
 
         def processCharacters(self, token):
@@ -1604,23 +1601,23 @@ def processCharacters(self, token):
 
         def processEOF(self):
             self.parser.parseError("expected-named-closing-tag-but-got-eof",
-                                   {"name":self.tree.openElements[-1].name})
+                                   {"name": self.tree.openElements[-1].name})
             self.tree.openElements.pop()
             self.parser.phase = self.parser.originalPhase
             return True
 
         def startTagOther(self, token):
-            assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode"%token['name']
+            assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name']
 
         def endTagScript(self, token):
             node = self.tree.openElements.pop()
             assert node.name == "script"
             self.parser.phase = self.parser.originalPhase
-            #The rest of this method is all stuff that only happens if
-            #document.write works
+            # The rest of this method is all stuff that only happens if
+            # document.write works
 
         def endTagOther(self, token):
-            node = self.tree.openElements.pop()
+            self.tree.openElements.pop()
             self.parser.phase = self.parser.originalPhase
 
     class InTablePhase(Phase):
@@ -1652,7 +1649,7 @@ def __init__(self, parser, tree):
         def clearStackToTableContext(self):
             # "clear the stack back to a table context"
             while self.tree.openElements[-1].name not in ("table", "html"):
-                #self.parser.parseError("unexpected-implied-end-tag-in-table",
+                # self.parser.parseError("unexpected-implied-end-tag-in-table",
                 #  {"name":  self.tree.openElements[-1].name})
                 self.tree.openElements.pop()
             # When the current node is <html> it's an innerHTML case
@@ -1663,7 +1660,7 @@ def processEOF(self):
                 self.parser.parseError("eof-in-table")
             else:
                 assert self.parser.innerHTML
-            #Stop parsing
+            # Stop parsing
 
         def processSpaceCharacters(self, token):
             originalPhase = self.parser.phase
@@ -1678,7 +1675,7 @@ def processCharacters(self, token):
             self.parser.phase.processCharacters(token)
 
         def insertText(self, token):
-            #If we get here there must be at least one non-whitespace character
+            # If we get here there must be at least one non-whitespace character
             # Do the table magic!
             self.tree.insertFromTable = True
             self.parser.phases["inBody"].processCharacters(token)
@@ -1710,7 +1707,7 @@ def startTagImplyTbody(self, token):
 
         def startTagTable(self, token):
             self.parser.parseError("unexpected-start-tag-implies-end-tag",
-              {"startName": "table", "endName": "table"})
+                                   {"startName": "table", "endName": "table"})
             self.parser.phase.processEndTag(impliedTagToken("table"))
             if not self.parser.innerHTML:
                 return token
@@ -1720,7 +1717,7 @@ def startTagStyleScript(self, token):
 
         def startTagInput(self, token):
             if ("type" in token["data"] and
-                token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
+                    token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
                 self.parser.parseError("unexpected-hidden-input-in-table")
                 self.tree.insertElement(token)
                 # XXX associate with form
@@ -1747,8 +1744,8 @@ def endTagTable(self, token):
                 self.tree.generateImpliedEndTags()
                 if self.tree.openElements[-1].name != "table":
                     self.parser.parseError("end-tag-too-early-named",
-                      {"gotName": "table",
-                       "expectedName": self.tree.openElements[-1].name})
+                                           {"gotName": "table",
+                                            "expectedName": self.tree.openElements[-1].name})
                 while self.tree.openElements[-1].name != "table":
                     self.tree.openElements.pop()
                 self.tree.openElements.pop()
@@ -1777,7 +1774,7 @@ def __init__(self, parser, tree):
         def flushCharacters(self):
             data = "".join([item["data"] for item in self.characterTokens])
             if any([item not in spaceCharacters for item in data]):
-                token = {"type":tokenTypes["Characters"], "data":data}
+                token = {"type": tokenTypes["Characters"], "data": data}
                 self.parser.phases["inTable"].insertText(token)
             elif data:
                 self.tree.insertText(data)
@@ -1799,7 +1796,7 @@ def processCharacters(self, token):
             self.characterTokens.append(token)
 
         def processSpaceCharacters(self, token):
-            #pretty sure we should never reach here
+            # pretty sure we should never reach here
             self.characterTokens.append(token)
     #        assert False
 
@@ -1813,7 +1810,6 @@ def processEndTag(self, token):
             self.parser.phase = self.originalPhase
             return token
 
-
     class InCaptionPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
         def __init__(self, parser, tree):
@@ -1845,7 +1841,7 @@ def processCharacters(self, token):
 
         def startTagTableElement(self, token):
             self.parser.parseError()
-            #XXX Have to duplicate logic here to find out if the tag is ignored
+            # XXX Have to duplicate logic here to find out if the tag is ignored
             ignoreEndTag = self.ignoreEndTagCaption()
             self.parser.phase.processEndTag(impliedTagToken("caption"))
             if not ignoreEndTag:
@@ -1860,8 +1856,8 @@ def endTagCaption(self, token):
                 self.tree.generateImpliedEndTags()
                 if self.tree.openElements[-1].name != "caption":
                     self.parser.parseError("expected-one-end-tag-but-got-another",
-                      {"gotName": "caption",
-                       "expectedName": self.tree.openElements[-1].name})
+                                           {"gotName": "caption",
+                                            "expectedName": self.tree.openElements[-1].name})
                 while self.tree.openElements[-1].name != "caption":
                     self.tree.openElements.pop()
                 self.tree.openElements.pop()
@@ -1885,7 +1881,6 @@ def endTagIgnore(self, token):
         def endTagOther(self, token):
             return self.parser.phases["inBody"].processEndTag(token)
 
-
     class InColumnGroupPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#in-column
 
@@ -1951,7 +1946,6 @@ def endTagOther(self, token):
             if not ignoreEndTag:
                 return token
 
-
     class InTableBodyPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
         def __init__(self, parser, tree):
@@ -1976,8 +1970,8 @@ def __init__(self, parser, tree):
         # helper methods
         def clearStackToTableBodyContext(self):
             while self.tree.openElements[-1].name not in ("tbody", "tfoot",
-              "thead", "html"):
-                #self.parser.parseError("unexpected-implied-end-tag-in-table",
+                                                          "thead", "html"):
+                # self.parser.parseError("unexpected-implied-end-tag-in-table",
                 #  {"name": self.tree.openElements[-1].name})
                 self.tree.openElements.pop()
             if self.tree.openElements[-1].name == "html":
@@ -2008,7 +2002,7 @@ def startTagTableOther(self, token):
             # XXX AT Any ideas on how to share this with endTagTable?
             if (self.tree.elementInScope("tbody", variant="table") or
                 self.tree.elementInScope("thead", variant="table") or
-                self.tree.elementInScope("tfoot", variant="table")):
+                    self.tree.elementInScope("tfoot", variant="table")):
                 self.clearStackToTableBodyContext()
                 self.endTagTableRowGroup(
                     impliedTagToken(self.tree.openElements[-1].name))
@@ -2028,12 +2022,12 @@ def endTagTableRowGroup(self, token):
                 self.parser.phase = self.parser.phases["inTable"]
             else:
                 self.parser.parseError("unexpected-end-tag-in-table-body",
-                  {"name": token["name"]})
+                                       {"name": token["name"]})
 
         def endTagTable(self, token):
             if (self.tree.elementInScope("tbody", variant="table") or
                 self.tree.elementInScope("thead", variant="table") or
-                self.tree.elementInScope("tfoot", variant="table")):
+                    self.tree.elementInScope("tfoot", variant="table")):
                 self.clearStackToTableBodyContext()
                 self.endTagTableRowGroup(
                     impliedTagToken(self.tree.openElements[-1].name))
@@ -2045,12 +2039,11 @@ def endTagTable(self, token):
 
         def endTagIgnore(self, token):
             self.parser.parseError("unexpected-end-tag-in-table-body",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
 
         def endTagOther(self, token):
             return self.parser.phases["inTable"].processEndTag(token)
 
-
     class InRowPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#in-row
         def __init__(self, parser, tree):
@@ -2068,7 +2061,7 @@ def __init__(self, parser, tree):
                 ("table", self.endTagTable),
                 (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
                 (("body", "caption", "col", "colgroup", "html", "td", "th"),
-                  self.endTagIgnore)
+                 self.endTagIgnore)
             ])
             self.endTagHandler.default = self.endTagOther
 
@@ -2076,7 +2069,7 @@ def __init__(self, parser, tree):
         def clearStackToTableRowContext(self):
             while self.tree.openElements[-1].name not in ("tr", "html"):
                 self.parser.parseError("unexpected-implied-end-tag-in-table-row",
-                  {"name": self.tree.openElements[-1].name})
+                                       {"name": self.tree.openElements[-1].name})
                 self.tree.openElements.pop()
 
         def ignoreEndTagTr(self):
@@ -2135,7 +2128,7 @@ def endTagTableRowGroup(self, token):
 
         def endTagIgnore(self, token):
             self.parser.parseError("unexpected-end-tag-in-table-row",
-                {"name": token["name"]})
+                                   {"name": token["name"]})
 
         def endTagOther(self, token):
             return self.parser.phases["inTable"].processEndTag(token)
@@ -2174,7 +2167,7 @@ def processCharacters(self, token):
 
         def startTagTableOther(self, token):
             if (self.tree.elementInScope("td", variant="table") or
-                self.tree.elementInScope("th", variant="table")):
+                    self.tree.elementInScope("th", variant="table")):
                 self.closeCell()
                 return token
             else:
@@ -2190,7 +2183,7 @@ def endTagTableCell(self, token):
                 self.tree.generateImpliedEndTags(token["name"])
                 if self.tree.openElements[-1].name != token["name"]:
                     self.parser.parseError("unexpected-cell-end-tag",
-                      {"name": token["name"]})
+                                           {"name": token["name"]})
                     while True:
                         node = self.tree.openElements.pop()
                         if node.name == token["name"]:
@@ -2279,19 +2272,19 @@ def startTagScript(self, token):
 
         def startTagOther(self, token):
             self.parser.parseError("unexpected-start-tag-in-select",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
 
         def endTagOption(self, token):
             if self.tree.openElements[-1].name == "option":
                 self.tree.openElements.pop()
             else:
                 self.parser.parseError("unexpected-end-tag-in-select",
-                  {"name": "option"})
+                                       {"name": "option"})
 
         def endTagOptgroup(self, token):
             # </optgroup> implicitly closes <option>
             if (self.tree.openElements[-1].name == "option" and
-                self.tree.openElements[-2].name == "optgroup"):
+                    self.tree.openElements[-2].name == "optgroup"):
                 self.tree.openElements.pop()
             # It also closes </optgroup>
             if self.tree.openElements[-1].name == "optgroup":
@@ -2299,7 +2292,7 @@ def endTagOptgroup(self, token):
             # But nothing else
             else:
                 self.parser.parseError("unexpected-end-tag-in-select",
-                  {"name": "optgroup"})
+                                       {"name": "optgroup"})
 
         def endTagSelect(self, token):
             if self.tree.elementInScope("select", variant="select"):
@@ -2314,8 +2307,7 @@ def endTagSelect(self, token):
 
         def endTagOther(self, token):
             self.parser.parseError("unexpected-end-tag-in-select",
-              {"name": token["name"]})
-
+                                   {"name": token["name"]})
 
     class InSelectInTablePhase(Phase):
         def __init__(self, parser, tree):
@@ -2356,56 +2348,56 @@ def endTagTable(self, token):
         def endTagOther(self, token):
             return self.parser.phases["inSelect"].processEndTag(token)
 
-
     class InForeignContentPhase(Phase):
         breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",
                                       "center", "code", "dd", "div", "dl", "dt",
                                       "em", "embed", "h1", "h2", "h3",
                                       "h4", "h5", "h6", "head", "hr", "i", "img",
                                       "li", "listing", "menu", "meta", "nobr",
-                                      "ol", "p", "pre", "ruby", "s",  "small",
-                                      "span", "strong", "strike",  "sub", "sup",
+                                      "ol", "p", "pre", "ruby", "s", "small",
+                                      "span", "strong", "strike", "sub", "sup",
                                       "table", "tt", "u", "ul", "var"])
+
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
 
         def adjustSVGTagNames(self, token):
-            replacements = {"altglyph":"altGlyph",
-                            "altglyphdef":"altGlyphDef",
-                            "altglyphitem":"altGlyphItem",
-                            "animatecolor":"animateColor",
-                            "animatemotion":"animateMotion",
-                            "animatetransform":"animateTransform",
-                            "clippath":"clipPath",
-                            "feblend":"feBlend",
-                            "fecolormatrix":"feColorMatrix",
-                            "fecomponenttransfer":"feComponentTransfer",
-                            "fecomposite":"feComposite",
-                            "feconvolvematrix":"feConvolveMatrix",
-                            "fediffuselighting":"feDiffuseLighting",
-                            "fedisplacementmap":"feDisplacementMap",
-                            "fedistantlight":"feDistantLight",
-                            "feflood":"feFlood",
-                            "fefunca":"feFuncA",
-                            "fefuncb":"feFuncB",
-                            "fefuncg":"feFuncG",
-                            "fefuncr":"feFuncR",
-                            "fegaussianblur":"feGaussianBlur",
-                            "feimage":"feImage",
-                            "femerge":"feMerge",
-                            "femergenode":"feMergeNode",
-                            "femorphology":"feMorphology",
-                            "feoffset":"feOffset",
-                            "fepointlight":"fePointLight",
-                            "fespecularlighting":"feSpecularLighting",
-                            "fespotlight":"feSpotLight",
-                            "fetile":"feTile",
-                            "feturbulence":"feTurbulence",
-                            "foreignobject":"foreignObject",
-                            "glyphref":"glyphRef",
-                            "lineargradient":"linearGradient",
-                            "radialgradient":"radialGradient",
-                            "textpath":"textPath"}
+            replacements = {"altglyph": "altGlyph",
+                            "altglyphdef": "altGlyphDef",
+                            "altglyphitem": "altGlyphItem",
+                            "animatecolor": "animateColor",
+                            "animatemotion": "animateMotion",
+                            "animatetransform": "animateTransform",
+                            "clippath": "clipPath",
+                            "feblend": "feBlend",
+                            "fecolormatrix": "feColorMatrix",
+                            "fecomponenttransfer": "feComponentTransfer",
+                            "fecomposite": "feComposite",
+                            "feconvolvematrix": "feConvolveMatrix",
+                            "fediffuselighting": "feDiffuseLighting",
+                            "fedisplacementmap": "feDisplacementMap",
+                            "fedistantlight": "feDistantLight",
+                            "feflood": "feFlood",
+                            "fefunca": "feFuncA",
+                            "fefuncb": "feFuncB",
+                            "fefuncg": "feFuncG",
+                            "fefuncr": "feFuncR",
+                            "fegaussianblur": "feGaussianBlur",
+                            "feimage": "feImage",
+                            "femerge": "feMerge",
+                            "femergenode": "feMergeNode",
+                            "femorphology": "feMorphology",
+                            "feoffset": "feOffset",
+                            "fepointlight": "fePointLight",
+                            "fespecularlighting": "feSpecularLighting",
+                            "fespotlight": "feSpotLight",
+                            "fetile": "feTile",
+                            "feturbulence": "feTurbulence",
+                            "foreignobject": "foreignObject",
+                            "glyphref": "glyphRef",
+                            "lineargradient": "linearGradient",
+                            "radialgradient": "radialGradient",
+                            "textpath": "textPath"}
 
             if token["name"] in replacements:
                 token["name"] = replacements[token["name"]]
@@ -2453,7 +2445,7 @@ def processEndTag(self, token):
 
             while True:
                 if node.name.translate(asciiUpper2Lower) == token["name"]:
-                    #XXX this isn't in the spec but it seems necessary
+                    # XXX this isn't in the spec but it seems necessary
                     if self.parser.phase == self.parser.phases["inTableText"]:
                         self.parser.phase.flushCharacters()
                         self.parser.phase = self.parser.phase.originalPhase
@@ -2471,21 +2463,20 @@ def processEndTag(self, token):
                     break
             return new_token
 
-
     class AfterBodyPhase(Phase):
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
 
             self.startTagHandler = utils.MethodDispatcher([
-                    ("html", self.startTagHtml)
-                    ])
+                ("html", self.startTagHtml)
+            ])
             self.startTagHandler.default = self.startTagOther
 
             self.endTagHandler = utils.MethodDispatcher([("html", self.endTagHtml)])
             self.endTagHandler.default = self.endTagOther
 
         def processEOF(self):
-            #Stop parsing
+            # Stop parsing
             pass
 
         def processComment(self, token):
@@ -2503,11 +2494,11 @@ def startTagHtml(self, token):
 
         def startTagOther(self, token):
             self.parser.parseError("unexpected-start-tag-after-body",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
             self.parser.phase = self.parser.phases["inBody"]
             return token
 
-        def endTagHtml(self,name):
+        def endTagHtml(self, name):
             if self.parser.innerHTML:
                 self.parser.parseError("unexpected-end-tag-after-body-innerhtml")
             else:
@@ -2515,7 +2506,7 @@ def endTagHtml(self,name):
 
         def endTagOther(self, token):
             self.parser.parseError("unexpected-end-tag-after-body",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
             self.parser.phase = self.parser.phases["inBody"]
             return token
 
@@ -2558,7 +2549,7 @@ def startTagNoframes(self, token):
 
         def startTagOther(self, token):
             self.parser.parseError("unexpected-start-tag-in-frameset",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
 
         def endTagFrameset(self, token):
             if self.tree.openElements[-1].name == "html":
@@ -2567,15 +2558,14 @@ def endTagFrameset(self, token):
             else:
                 self.tree.openElements.pop()
             if (not self.parser.innerHTML and
-                self.tree.openElements[-1].name != "frameset"):
+                    self.tree.openElements[-1].name != "frameset"):
                 # If we're not in innerHTML mode and the the current node is not a
                 # "frameset" element (anymore) then switch.
                 self.parser.phase = self.parser.phases["afterFrameset"]
 
         def endTagOther(self, token):
             self.parser.parseError("unexpected-end-tag-in-frameset",
-              {"name": token["name"]})
-
+                                   {"name": token["name"]})
 
     class AfterFramesetPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#after3
@@ -2594,7 +2584,7 @@ def __init__(self, parser, tree):
             self.endTagHandler.default = self.endTagOther
 
         def processEOF(self):
-            #Stop parsing
+            # Stop parsing
             pass
 
         def processCharacters(self, token):
@@ -2605,15 +2595,14 @@ def startTagNoframes(self, token):
 
         def startTagOther(self, token):
             self.parser.parseError("unexpected-start-tag-after-frameset",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
 
         def endTagHtml(self, token):
             self.parser.phase = self.parser.phases["afterAfterFrameset"]
 
         def endTagOther(self, token):
             self.parser.parseError("unexpected-end-tag-after-frameset",
-              {"name": token["name"]})
-
+                                   {"name": token["name"]})
 
     class AfterAfterBodyPhase(Phase):
         def __init__(self, parser, tree):
@@ -2643,13 +2632,13 @@ def startTagHtml(self, token):
 
         def startTagOther(self, token):
             self.parser.parseError("expected-eof-but-got-start-tag",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
             self.parser.phase = self.parser.phases["inBody"]
             return token
 
         def processEndTag(self, token):
             self.parser.parseError("expected-eof-but-got-end-tag",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
             self.parser.phase = self.parser.phases["inBody"]
             return token
 
@@ -2683,12 +2672,11 @@ def startTagNoFrames(self, token):
 
         def startTagOther(self, token):
             self.parser.parseError("expected-eof-but-got-start-tag",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
 
         def processEndTag(self, token):
             self.parser.parseError("expected-eof-but-got-end-tag",
-              {"name": token["name"]})
-
+                                   {"name": token["name"]})
 
     return {
         "initial": InitialPhase,
@@ -2715,14 +2703,16 @@ def processEndTag(self, token):
         "afterAfterBody": AfterAfterBodyPhase,
         "afterAfterFrameset": AfterAfterFramesetPhase,
         # XXX after after frameset
-        }
+    }
+
 
-def impliedTagToken(name, type="EndTag", attributes = None,
-                    selfClosing = False):
+def impliedTagToken(name, type="EndTag", attributes=None,
+                    selfClosing=False):
     if attributes is None:
         attributes = {}
-    return {"type":tokenTypes[type], "name":name, "data":attributes,
-            "selfClosing":selfClosing}
+    return {"type": tokenTypes[type], "name": name, "data": attributes,
+            "selfClosing": selfClosing}
+
 
 class ParseError(Exception):
     """Error in parsed document"""
diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py
index f6f30fcf..c9ad69c5 100644
--- a/html5lib/ihatexml.py
+++ b/html5lib/ihatexml.py
@@ -5,26 +5,101 @@
 
 from .constants import DataLossWarning
 
-baseChar = """[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | #x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | [#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | [#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | #x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]"""
+baseChar = """
+[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] |
+[#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] |
+[#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] |
+[#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 |
+[#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] |
+[#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] |
+[#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] |
+[#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] |
+[#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 |
+[#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] |
+[#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] |
+[#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D |
+[#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] |
+[#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] |
+[#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] |
+[#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] |
+[#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] |
+[#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] |
+[#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 |
+[#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] |
+[#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] |
+[#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] |
+[#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] |
+[#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] |
+[#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] |
+[#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] |
+[#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] |
+[#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] |
+[#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] |
+[#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A |
+#x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 |
+#x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] |
+#x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] |
+[#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] |
+[#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C |
+#x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 |
+[#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] |
+[#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] |
+[#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 |
+[#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] |
+[#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B |
+#x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE |
+[#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] |
+[#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 |
+[#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] |
+[#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]"""
 
 ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]"""
 
-combiningCharacter = """[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | #x3099 | #x309A"""
-
-digit = """[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]"""
-
-extender = """#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]"""
+combiningCharacter = """
+[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] |
+[#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 |
+[#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] |
+[#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] |
+#x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] |
+[#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] |
+[#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 |
+#x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] |
+[#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC |
+[#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] |
+#x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] |
+[#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] |
+[#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] |
+[#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] |
+[#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] |
+[#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] |
+#x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 |
+[#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] |
+#x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] |
+[#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] |
+[#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] |
+#x3099 | #x309A"""
+
+digit = """
+[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] |
+[#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] |
+[#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] |
+[#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]"""
+
+extender = """
+#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 |
+#[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]"""
 
 letter = " | ".join([baseChar, ideographic])
 
-#Without the
+# Without the
 name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter,
-                       extender])
+                   extender])
 nameFirst = " | ".join([letter, "_"])
 
 reChar = re.compile(r"#x([\d|A-F]{4,4})")
 reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]")
 
+
 def charStringToList(chars):
     charRanges = [item.strip() for item in chars.split(" | ")]
     rv = []
@@ -35,7 +110,7 @@ def charStringToList(chars):
             if match is not None:
                 rv.append([hexToInt(item) for item in match.groups()])
                 if len(rv[-1]) == 1:
-                    rv[-1] = rv[-1]*2
+                    rv[-1] = rv[-1] * 2
                 foundMatch = True
                 break
         if not foundMatch:
@@ -45,6 +120,7 @@ def charStringToList(chars):
     rv = normaliseCharList(rv)
     return rv
 
+
 def normaliseCharList(charList):
     charList = sorted(charList)
     for item in charList:
@@ -54,41 +130,45 @@ def normaliseCharList(charList):
     while i < len(charList):
         j = 1
         rv.append(charList[i])
-        while i + j < len(charList) and charList[i+j][0] <= rv[-1][1] + 1:
-            rv[-1][1] = charList[i+j][1]
+        while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1:
+            rv[-1][1] = charList[i + j][1]
             j += 1
         i += j
     return rv
 
-#We don't really support characters above the BMP :(
+# We don't really support characters above the BMP :(
 max_unicode = int("FFFF", 16)
 
+
 def missingRanges(charList):
     rv = []
     if charList[0] != 0:
         rv.append([0, charList[0][0] - 1])
     for i, item in enumerate(charList[:-1]):
-        rv.append([item[1]+1, charList[i+1][0] - 1])
+        rv.append([item[1] + 1, charList[i + 1][0] - 1])
     if charList[-1][1] != max_unicode:
         rv.append([charList[-1][1] + 1, max_unicode])
     return rv
 
+
 def listToRegexpStr(charList):
     rv = []
     for item in charList:
         if item[0] == item[1]:
-           rv.append(escapeRegexp(chr(item[0])))
+            rv.append(escapeRegexp(chr(item[0])))
         else:
             rv.append(escapeRegexp(chr(item[0])) + "-" +
                       escapeRegexp(chr(item[1])))
-    return "[%s]"%"".join(rv)
+    return "[%s]" % "".join(rv)
+
 
 def hexToInt(hex_str):
     return int(hex_str, 16)
 
+
 def escapeRegexp(string):
     specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}",
-                          "[", "]", "|", "(", ")", "-")
+                         "[", "]", "|", "(", ")", "-")
     for char in specialCharacters:
         string = string.replace(char, "\\" + char)
         if char in string:
@@ -96,19 +176,21 @@ def escapeRegexp(string):
 
     return string
 
-#output from the above
+# output from the above
 nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
 
 nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
 
+
 class InfosetFilter(object):
     replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
-    def __init__(self, replaceChars = None,
-                 dropXmlnsLocalName = False,
-                 dropXmlnsAttrNs = False,
-                 preventDoubleDashComments = False,
-                 preventDashAtCommentEnd = False,
-                 replaceFormFeedCharacters = True):
+
+    def __init__(self, replaceChars=None,
+                 dropXmlnsLocalName=False,
+                 dropXmlnsAttrNs=False,
+                 preventDoubleDashComments=False,
+                 preventDashAtCommentEnd=False,
+                 replaceFormFeedCharacters=True):
 
         self.dropXmlnsLocalName = dropXmlnsLocalName
         self.dropXmlnsAttrNs = dropXmlnsAttrNs
@@ -146,7 +228,7 @@ def coerceCharacters(self, data):
             for i in range(data.count("\x0C")):
                 warnings.warn("Text cannot contain U+000C", DataLossWarning)
             data = data.replace("\x0C", " ")
-        #Other non-xml characters
+        # Other non-xml characters
         return data
 
     def toXmlName(self, name):
diff --git a/html5lib/inputstream.py b/html5lib/inputstream.py
index ca2514e6..159901be 100644
--- a/html5lib/inputstream.py
+++ b/html5lib/inputstream.py
@@ -3,7 +3,6 @@
 
 import codecs
 import re
-import types
 import sys
 
 from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
@@ -23,7 +22,7 @@
     class BufferedIOBase(object):
         pass
 
-#Non-unicode versions of constants for use in the pre-parser
+# Non-unicode versions of constants for use in the pre-parser
 spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
 asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
 asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
@@ -44,6 +43,7 @@ class BufferedIOBase(object):
 # Cache for charsUntil()
 charsUntilRegEx = {}
 
+
 class BufferedStream:
     """Buffering for streams that do not have buffering of their own
 
@@ -54,7 +54,7 @@ class BufferedStream:
     def __init__(self, stream):
         self.stream = stream
         self.buffer = []
-        self.position = [-1,0] #chunk number, offset
+        self.position = [-1, 0]  # chunk number, offset
 
     def tell(self):
         pos = 0
@@ -107,8 +107,7 @@ def _readFromBuffer(self, bytes):
                 bytesToRead = len(bufferedData) - bufferOffset
                 self.position = [bufferIndex, len(bufferedData)]
                 bufferIndex += 1
-            data = rv.append(bufferedData[bufferOffset:
-                                          bufferOffset + bytesToRead])
+            rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead])
             remainingBytes -= bytesToRead
 
             bufferOffset = 0
@@ -161,7 +160,7 @@ def __init__(self, source):
 
         """
 
-        #Craziness
+        # Craziness
         if len("\U0010FFFF") == 1:
             self.reportCharacterErrors = self.characterErrorsUCS4
             self.replaceCharactersRegexp = re.compile("[\uD800-\uDFFF]")
@@ -188,7 +187,7 @@ def reset(self):
         # number of columns in the last line of the previous chunk
         self.prevNumCols = 0
 
-        #Deal with CR LF and surrogates split over chunk boundaries
+        # Deal with CR LF and surrogates split over chunk boundaries
         self._bufferedCharacter = None
 
     def openStream(self, source):
@@ -203,10 +202,10 @@ def openStream(self, source):
         else:
             stream = StringIO(source)
 
-        if (#not isinstance(stream, BufferedIOBase) and
+        if (  # not isinstance(stream, BufferedIOBase) and
             not(hasattr(stream, "tell") and
                 hasattr(stream, "seek")) or
-            stream is sys.stdin):
+                stream is sys.stdin):
             stream = BufferedStream(stream)
 
         return stream
@@ -225,7 +224,7 @@ def _position(self, offset):
     def position(self):
         """Returns (line, col) of the current position in the stream."""
         line, col = self._position(self.chunkOffset)
-        return (line+1, col)
+        return (line + 1, col)
 
     def char(self):
         """ Read one character from the stream or queue if available. Return
@@ -254,7 +253,7 @@ def readChunk(self, chunkSize=None):
 
         data = self.dataStream.read(chunkSize)
 
-        #Deal with CR LF and surrogates broken across chunks
+        # Deal with CR LF and surrogates broken across chunks
         if self._bufferedCharacter:
             data = self._bufferedCharacter + data
             self._bufferedCharacter = None
@@ -287,19 +286,18 @@ def characterErrorsUCS4(self, data):
             self.errors.append("invalid-codepoint")
 
     def characterErrorsUCS2(self, data):
-        #Someone picked the wrong compile option
-        #You lose
+        # Someone picked the wrong compile option
+        # You lose
         skip = False
-        import sys
         for match in invalid_unicode_re.finditer(data):
             if skip:
                 continue
             codepoint = ord(match.group())
             pos = match.start()
-            #Pretty sure there should be endianness issues here
-            if utils.isSurrogatePair(data[pos:pos+2]):
-                #We have a surrogate pair!
-                char_val = utils.surrogatePairToCodepoint(data[pos:pos+2])
+            # Pretty sure there should be endianness issues here
+            if utils.isSurrogatePair(data[pos:pos + 2]):
+                # We have a surrogate pair!
+                char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2])
                 if char_val in non_bmp_invalid_codepoints:
                     self.errors.append("invalid-codepoint")
                 skip = True
@@ -310,7 +308,7 @@ def characterErrorsUCS2(self, data):
                 skip = False
                 self.errors.append("invalid-codepoint")
 
-    def charsUntil(self, characters, opposite = False):
+    def charsUntil(self, characters, opposite=False):
         """ Returns a string of characters from the stream up to but not
         including any character in 'characters' or EOF. 'characters' must be
         a container that supports the 'in' method and iteration over its
@@ -373,6 +371,7 @@ def unget(self, char):
                 self.chunkOffset -= 1
                 assert self.chunk[self.chunkOffset] == char
 
+
 class HTMLBinaryInputStream(HTMLUnicodeInputStream):
     """Provides a unicode stream of characters to the HTMLTokenizer.
 
@@ -406,19 +405,19 @@ def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
         self.charEncoding = (codecName(encoding), "certain")
 
         # Encoding Information
-        #Number of bytes to use when looking for a meta element with
-        #encoding information
+        # Number of bytes to use when looking for a meta element with
+        # encoding information
         self.numBytesMeta = 512
-        #Number of bytes to use when using detecting encoding using chardet
+        # Number of bytes to use when using detecting encoding using chardet
         self.numBytesChardet = 100
-        #Encoding to use if no other information can be found
+        # Encoding to use if no other information can be found
         self.defaultEncoding = "windows-1252"
 
-        #Detect encoding iff no explicit "transport level" encoding is supplied
+        # Detect encoding iff no explicit "transport level" encoding is supplied
         if (self.charEncoding[0] is None):
             self.charEncoding = self.detectEncoding(parseMeta, chardet)
 
-        #Call superclass
+        # Call superclass
         self.reset()
 
     def reset(self):
@@ -439,22 +438,22 @@ def openStream(self, source):
             stream = BytesIO(source)
 
         if (not(hasattr(stream, "tell") and hasattr(stream, "seek")) or
-            stream is sys.stdin):
+                stream is sys.stdin):
             stream = BufferedStream(stream)
 
         return stream
 
     def detectEncoding(self, parseMeta=True, chardet=True):
-        #First look for a BOM
-        #This will also read past the BOM if present
+        # First look for a BOM
+        # This will also read past the BOM if present
         encoding = self.detectBOM()
         confidence = "certain"
-        #If there is no BOM need to look for meta elements with encoding
-        #information
+        # If there is no BOM need to look for meta elements with encoding
+        # information
         if encoding is None and parseMeta:
             encoding = self.detectEncodingMeta()
             confidence = "tentative"
-        #Guess with chardet, if avaliable
+        # Guess with chardet, if avaliable
         if encoding is None and chardet:
             confidence = "tentative"
             try:
@@ -475,11 +474,11 @@ def detectEncoding(self, parseMeta=True, chardet=True):
                 pass
         # If all else fails use the default encoding
         if encoding is None:
-            confidence="tentative"
+            confidence = "tentative"
             encoding = self.defaultEncoding
 
-        #Substitute for equivalent encodings:
-        encodingSub = {"iso-8859-1":"windows-1252"}
+        # Substitute for equivalent encodings:
+        encodingSub = {"iso-8859-1": "windows-1252"}
 
         if encoding.lower() in encodingSub:
             encoding = encodingSub[encoding.lower()]
@@ -499,7 +498,7 @@ def changeEncoding(self, newEncoding):
             self.rawStream.seek(0)
             self.reset()
             self.charEncoding = (newEncoding, "certain")
-            raise ReparseException("Encoding changed from %s to %s"%(self.charEncoding[0], newEncoding))
+            raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
 
     def detectBOM(self):
         """Attempts to detect at BOM at the start of the stream. If
@@ -523,7 +522,7 @@ def detectBOM(self):
             encoding = bomDict.get(string)         # UTF-32
             seek = 4
             if not encoding:
-                encoding = bomDict.get(string[:2]) # UTF-16
+                encoding = bomDict.get(string[:2])  # UTF-16
                 seek = 2
 
         # Set the read position past the BOM if one was found, otherwise
@@ -546,6 +545,7 @@ def detectEncodingMeta(self):
 
         return encoding
 
+
 class EncodingBytes(bytes):
     """String-like object with an associated position and various extra methods
     If the position is ever greater than the string length then an exception is
@@ -555,7 +555,7 @@ def __new__(self, value):
         return bytes.__new__(self, value.lower())
 
     def __init__(self, value):
-        self._position=-1
+        self._position = -1
 
     def __iter__(self):
         return self
@@ -566,7 +566,7 @@ def __next__(self):
             raise StopIteration
         elif p < 0:
             raise TypeError
-        return self[p:p+1]
+        return self[p:p + 1]
 
     def next(self):
         # Py2 compat
@@ -579,7 +579,7 @@ def previous(self):
         elif p < 0:
             raise TypeError
         self._position = p = p - 1
-        return self[p:p+1]
+        return self[p:p + 1]
 
     def setPosition(self, position):
         if self._position >= len(self):
@@ -597,7 +597,7 @@ def getPosition(self):
     position = property(getPosition, setPosition)
 
     def getCurrentByte(self):
-        return self[self.position:self.position+1]
+        return self[self.position:self.position + 1]
 
     currentByte = property(getCurrentByte)
 
@@ -605,7 +605,7 @@ def skip(self, chars=spaceCharactersBytes):
         """Skip past a list of characters"""
         p = self.position               # use property for the error-checking
         while p < len(self):
-            c = self[p:p+1]
+            c = self[p:p + 1]
             if c not in chars:
                 self._position = p
                 return c
@@ -616,7 +616,7 @@ def skip(self, chars=spaceCharactersBytes):
     def skipUntil(self, chars):
         p = self.position
         while p < len(self):
-            c = self[p:p+1]
+            c = self[p:p + 1]
             if c in chars:
                 self._position = p
                 return c
@@ -629,7 +629,7 @@ def matchBytes(self, bytes):
         are found return True and advance the position to the byte after the
         match. Otherwise return False and leave the position alone"""
         p = self.position
-        data = self[p:p+len(bytes)]
+        data = self[p:p + len(bytes)]
         rv = data.startswith(bytes)
         if rv:
             self.position += len(bytes)
@@ -643,7 +643,7 @@ def jumpTo(self, bytes):
             # XXX: This is ugly, but I can't see a nicer way to fix this.
             if self._position == -1:
                 self._position = 0
-            self._position += (newPosition + len(bytes)-1)
+            self._position += (newPosition + len(bytes) - 1)
             return True
         else:
             raise StopIteration
@@ -659,12 +659,12 @@ def __init__(self, data):
 
     def getEncoding(self):
         methodDispatch = (
-            (b"<!--",self.handleComment),
-            (b"<meta",self.handleMeta),
-            (b"</",self.handlePossibleEndTag),
-            (b"<!",self.handleOther),
-            (b"<?",self.handleOther),
-            (b"<",self.handlePossibleStartTag))
+            (b"<!--", self.handleComment),
+            (b"<meta", self.handleMeta),
+            (b"</", self.handlePossibleEndTag),
+            (b"<!", self.handleOther),
+            (b"<?", self.handleOther),
+            (b"<", self.handlePossibleStartTag))
         for byte in self.data:
             keepParsing = True
             for key, method in methodDispatch:
@@ -673,7 +673,7 @@ def getEncoding(self):
                         keepParsing = method()
                         break
                     except StopIteration:
-                        keepParsing=False
+                        keepParsing = False
                         break
             if not keepParsing:
                 break
@@ -686,13 +686,13 @@ def handleComment(self):
 
     def handleMeta(self):
         if self.data.currentByte not in spaceCharactersBytes:
-            #if we have <meta not followed by a space so just keep going
+            # if we have <meta not followed by a space so just keep going
             return True
-        #We have a valid meta element we want to search for attributes
+        # We have a valid meta element we want to search for attributes
         hasPragma = False
         pendingEncoding = None
         while True:
-            #Try to find the next attribute after the current position
+            # Try to find the next attribute after the current position
             attr = self.getAttribute()
             if attr is None:
                 return True
@@ -730,9 +730,9 @@ def handlePossibleEndTag(self):
     def handlePossibleTag(self, endTag):
         data = self.data
         if data.currentByte not in asciiLettersBytes:
-            #If the next byte is not an ascii letter either ignore this
-            #fragment (possible start tag case) or treat it according to
-            #handleOther
+            # If the next byte is not an ascii letter either ignore this
+            # fragment (possible start tag case) or treat it according to
+            # handleOther
             if endTag:
                 data.previous()
                 self.handleOther()
@@ -740,11 +740,11 @@ def handlePossibleTag(self, endTag):
 
         c = data.skipUntil(spacesAngleBrackets)
         if c == b"<":
-            #return to the first step in the overall "two step" algorithm
-            #reprocessing the < byte
+            # return to the first step in the overall "two step" algorithm
+            # reprocessing the < byte
             data.previous()
         else:
-            #Read all attributes
+            # Read all attributes
             attr = self.getAttribute()
             while attr is not None:
                 attr = self.getAttribute()
@@ -766,47 +766,47 @@ def getAttribute(self):
         # Step 3
         attrName = []
         attrValue = []
-        #Step 4 attribute name
+        # Step 4 attribute name
         while True:
             if c == b"=" and attrName:
                 break
             elif c in spaceCharactersBytes:
-                #Step 6!
+                # Step 6!
                 c = data.skip()
                 break
             elif c in (b"/", b">"):
                 return b"".join(attrName), b""
             elif c in asciiUppercaseBytes:
                 attrName.append(c.lower())
-            elif c == None:
+            elif c is None:
                 return None
             else:
                 attrName.append(c)
-            #Step 5
+            # Step 5
             c = next(data)
-        #Step 7
+        # Step 7
         if c != b"=":
             data.previous()
             return b"".join(attrName), b""
-        #Step 8
+        # Step 8
         next(data)
-        #Step 9
+        # Step 9
         c = data.skip()
-        #Step 10
+        # Step 10
         if c in (b"'", b'"'):
-            #10.1
+            # 10.1
             quoteChar = c
             while True:
-                #10.2
+                # 10.2
                 c = next(data)
-                #10.3
+                # 10.3
                 if c == quoteChar:
                     next(data)
                     return b"".join(attrName), b"".join(attrValue)
-                #10.4
+                # 10.4
                 elif c in asciiUppercaseBytes:
                     attrValue.append(c.lower())
-                #10.5
+                # 10.5
                 else:
                     attrValue.append(c)
         elif c == b">":
@@ -834,19 +834,20 @@ class ContentAttrParser(object):
     def __init__(self, data):
         assert isinstance(data, bytes)
         self.data = data
+
     def parse(self):
         try:
-            #Check if the attr name is charset
-            #otherwise return
+            # Check if the attr name is charset
+            # otherwise return
             self.data.jumpTo(b"charset")
             self.data.position += 1
             self.data.skip()
             if not self.data.currentByte == b"=":
-                #If there is no = sign keep looking for attrs
+                # If there is no = sign keep looking for attrs
                 return None
             self.data.position += 1
             self.data.skip()
-            #Look for an encoding between matching quote marks
+            # Look for an encoding between matching quote marks
             if self.data.currentByte in (b'"', b"'"):
                 quoteMark = self.data.currentByte
                 self.data.position += 1
@@ -856,13 +857,13 @@ def parse(self):
                 else:
                     return None
             else:
-                #Unquoted value
+                # Unquoted value
                 oldPosition = self.data.position
                 try:
                     self.data.skipUntil(spaceCharactersBytes)
                     return self.data[oldPosition:self.data.position]
                 except StopIteration:
-                    #Return the whole remaining value
+                    # Return the whole remaining value
                     return self.data[oldPosition:]
         except StopIteration:
             return None
diff --git a/html5lib/sanitizer.py b/html5lib/sanitizer.py
index 9c7d342d..2f63a471 100644
--- a/html5lib/sanitizer.py
+++ b/html5lib/sanitizer.py
@@ -6,138 +6,139 @@
 from .tokenizer import HTMLTokenizer
 from .constants import tokenTypes
 
+
 class HTMLSanitizerMixin(object):
     """ sanitization of XHTML+MathML+SVG and of inline style attributes."""
 
     acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area',
-        'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
-        'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
-        'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
-        'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
-        'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
-        'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
-        'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
-        'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
-        'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
-        'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
-        'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
-        'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
+                           'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
+                           'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
+                           'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
+                           'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
+                           'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
+                           'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
+                           'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
+                           'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
+                           'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
+                           'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
+                           'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
+                           'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
 
     mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
-        'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
-        'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
-        'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
-        'munderover', 'none']
+                       'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
+                       'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
+                       'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
+                       'munderover', 'none']
 
     svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
-        'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
-        'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
-        'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
-        'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
-        'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
+                    'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
+                    'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
+                    'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
+                    'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
+                    'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
 
     acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
-        'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
-        'background', 'balance', 'bgcolor', 'bgproperties', 'border',
-        'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
-        'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
-        'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
-        'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
-        'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
-        'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
-        'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
-        'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
-        'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
-        'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
-        'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
-        'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
-        'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload',
-        'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
-        'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
-        'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
-        'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
-        'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
-        'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
-        'width', 'wrap', 'xml:lang']
+                             'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
+                             'background', 'balance', 'bgcolor', 'bgproperties', 'border',
+                             'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
+                             'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
+                             'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
+                             'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
+                             'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
+                             'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
+                             'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
+                             'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
+                             'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
+                             'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
+                             'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
+                             'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
+                             'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload',
+                             'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
+                             'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
+                             'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
+                             'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
+                             'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
+                             'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
+                             'width', 'wrap', 'xml:lang']
 
     mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
-        'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
-        'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
-        'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
-        'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
-        'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
-        'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
-        'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
-        'xlink:type', 'xmlns', 'xmlns:xlink']
+                         'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
+                         'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
+                         'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
+                         'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
+                         'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
+                         'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
+                         'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
+                         'xlink:type', 'xmlns', 'xmlns:xlink']
 
     svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
-        'arabic-form', 'ascent', 'attributeName', 'attributeType',
-        'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
-        'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
-        'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
-        'fill-opacity', 'fill-rule', 'font-family', 'font-size',
-        'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
-        'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
-        'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
-        'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
-        'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
-        'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
-        'opacity', 'orient', 'origin', 'overline-position',
-        'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
-        'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
-        'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
-        'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
-        'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
-        'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
-        'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
-        'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
-        'transform', 'type', 'u1', 'u2', 'underline-position',
-        'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
-        'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
-        'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
-        'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
-        'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
-        'y1', 'y2', 'zoomAndPan']
+                      'arabic-form', 'ascent', 'attributeName', 'attributeType',
+                      'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
+                      'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
+                      'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
+                      'fill-opacity', 'fill-rule', 'font-family', 'font-size',
+                      'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
+                      'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
+                      'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
+                      'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
+                      'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
+                      'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
+                      'opacity', 'orient', 'origin', 'overline-position',
+                      'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
+                      'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
+                      'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
+                      'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
+                      'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
+                      'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
+                      'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
+                      'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
+                      'transform', 'type', 'u1', 'u2', 'underline-position',
+                      'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
+                      'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
+                      'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
+                      'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
+                      'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
+                      'y1', 'y2', 'zoomAndPan']
 
     attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster',
-        'xlink:href', 'xml:base']
+                       'xlink:href', 'xml:base']
 
     svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
-        'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
-        'mask', 'stroke']
+                               'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
+                               'mask', 'stroke']
 
     svg_allow_local_href = ['altGlyph', 'animate', 'animateColor',
-        'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter',
-        'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref',
-        'set', 'use']
+                            'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter',
+                            'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref',
+                            'set', 'use']
 
     acceptable_css_properties = ['azimuth', 'background-color',
-        'border-bottom-color', 'border-collapse', 'border-color',
-        'border-left-color', 'border-right-color', 'border-top-color', 'clear',
-        'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
-        'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
-        'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
-        'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
-        'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
-        'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
-        'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
-        'white-space', 'width']
+                                 'border-bottom-color', 'border-collapse', 'border-color',
+                                 'border-left-color', 'border-right-color', 'border-top-color', 'clear',
+                                 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
+                                 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
+                                 'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
+                                 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
+                                 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
+                                 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
+                                 'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
+                                 'white-space', 'width']
 
     acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue',
-        'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
-        'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
-        'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
-        'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
-        'transparent', 'underline', 'white', 'yellow']
+                               'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
+                               'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
+                               'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
+                               'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
+                               'transparent', 'underline', 'white', 'yellow']
 
-    acceptable_svg_properties = [ 'fill', 'fill-opacity', 'fill-rule',
-        'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
-        'stroke-opacity']
+    acceptable_svg_properties = ['fill', 'fill-opacity', 'fill-rule',
+                                 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
+                                 'stroke-opacity']
 
-    acceptable_protocols = [ 'ed2k', 'ftp', 'http', 'https', 'irc',
-        'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
-        'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
-        'ssh', 'sftp', 'rtsp', 'afs' ]
+    acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc',
+                            'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
+                            'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
+                            'ssh', 'sftp', 'rtsp', 'afs']
 
     # subclasses may define their own versions of these constants
     allowed_elements = acceptable_elements + mathml_elements + svg_elements
@@ -163,13 +164,13 @@ def sanitize_token(self, token):
         # accommodate filters which use token_type differently
         token_type = token["type"]
         if token_type in list(tokenTypes.keys()):
-          token_type = tokenTypes[token_type]
+            token_type = tokenTypes[token_type]
 
         if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"],
-                             tokenTypes["EmptyTag"]):
+                          tokenTypes["EmptyTag"]):
             if token["name"] in self.allowed_elements:
                 if "data" in token:
-                    attrs = dict([(name,val) for name,val in
+                    attrs = dict([(name, val) for name, val in
                                   token["data"][::-1]
                                   if name in self.allowed_attributes])
                     for attr in self.attr_val_is_uri:
@@ -177,9 +178,9 @@ def sanitize_token(self, token):
                             continue
                         val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                                unescape(attrs[attr])).lower()
-                        #remove replacement characters from unescaped characters
+                        # remove replacement characters from unescaped characters
                         val_unescaped = val_unescaped.replace("\ufffd", "")
-                        if (re.match("^[a-z0-9][-+.a-z0-9]*:",val_unescaped) and
+                        if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped) and
                             (val_unescaped.split(':')[0] not in
                              self.allowed_protocols)):
                             del attrs[attr]
@@ -194,18 +195,18 @@ def sanitize_token(self, token):
                         del attrs['xlink:href']
                     if 'style' in attrs:
                         attrs['style'] = self.sanitize_css(attrs['style'])
-                    token["data"] = [[name,val] for name,val in list(attrs.items())]
+                    token["data"] = [[name, val] for name, val in list(attrs.items())]
                 return token
             else:
                 if token_type == tokenTypes["EndTag"]:
                     token["data"] = "</%s>" % token["name"]
                 elif token["data"]:
-                    attrs = ''.join([' %s="%s"' % (k,escape(v)) for k,v in token["data"]])
-                    token["data"] = "<%s%s>" % (token["name"],attrs)
+                    attrs = ''.join([' %s="%s"' % (k, escape(v)) for k, v in token["data"]])
+                    token["data"] = "<%s%s>" % (token["name"], attrs)
                 else:
                     token["data"] = "<%s>" % token["name"]
                 if token.get("selfClosing"):
-                    token["data"]=token["data"][:-1] + "/>"
+                    token["data"] = token["data"][:-1] + "/>"
 
                 if token["type"] in list(tokenTypes.keys()):
                     token["type"] = "Characters"
@@ -221,35 +222,39 @@ def sanitize_token(self, token):
 
     def sanitize_css(self, style):
         # disallow urls
-        style=re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ',style)
+        style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
 
         # gauntlet
-        if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): return ''
-        if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): return ''
+        if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
+            return ''
+        if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
+            return ''
 
         clean = []
-        for prop,value in re.findall("([-\w]+)\s*:\s*([^:;]*)",style):
-          if not value: continue
-          if prop.lower() in self.allowed_css_properties:
-              clean.append(prop + ': ' + value + ';')
-          elif prop.split('-')[0].lower() in ['background','border','margin',
-                                              'padding']:
-              for keyword in value.split():
-                  if not keyword in self.acceptable_css_keywords and \
-                      not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$",keyword):
-                      break
-              else:
-                  clean.append(prop + ': ' + value + ';')
-          elif prop.lower() in self.allowed_svg_properties:
-              clean.append(prop + ': ' + value + ';')
+        for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
+            if not value:
+                continue
+            if prop.lower() in self.allowed_css_properties:
+                clean.append(prop + ': ' + value + ';')
+            elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
+                                                'padding']:
+                for keyword in value.split():
+                    if not keyword in self.acceptable_css_keywords and \
+                            not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
+                        break
+                else:
+                    clean.append(prop + ': ' + value + ';')
+            elif prop.lower() in self.allowed_svg_properties:
+                clean.append(prop + ': ' + value + ';')
 
         return ' '.join(clean)
 
+
 class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
     def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
                  lowercaseElementName=False, lowercaseAttrName=False, parser=None):
-        #Change case matching defaults as we only output lowercase html anyway
-        #This solution doesn't seem ideal...
+        # Change case matching defaults as we only output lowercase html anyway
+        # This solution doesn't seem ideal...
         HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
                                lowercaseElementName, lowercaseAttrName, parser=parser)
 
diff --git a/html5lib/serializer/htmlserializer.py b/html5lib/serializer/htmlserializer.py
index 550b4db2..c5ec773b 100644
--- a/html5lib/serializer/htmlserializer.py
+++ b/html5lib/serializer/htmlserializer.py
@@ -6,7 +6,7 @@
 
 try:
     from functools import reduce
-    pass # no-op statement to avoid 3to2 introducing parse error
+    pass  # no-op statement to avoid 3to2 introducing parse error
 except ImportError:
     pass
 
@@ -24,14 +24,12 @@
 else:
     unicode_encode_errors = "htmlentityreplace"
 
-    from html5lib.constants import entities
-
     encode_entity_map = {}
     is_ucs4 = len("\U0010FFFF") == 1
     for k, v in list(entities.items()):
-        #skip multi-character entities
+        # skip multi-character entities
         if ((is_ucs4 and len(v) > 1) or
-            (not is_ucs4 and len(v) > 2)):
+                (not is_ucs4 and len(v) > 2)):
             continue
         if v != "&":
             if len(v) == 2:
@@ -56,8 +54,8 @@ def htmlentityreplace_errors(exc):
                     skip = False
                     continue
                 index = i + exc.start
-                if utils.isSurrogatePair(exc.object[index:min([exc.end, index+2])]):
-                    codepoint = utils.surrogatePairToCodepoint(exc.object[index:index+2])
+                if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
+                    codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2])
                     skip = True
                 else:
                     codepoint = ord(c)
@@ -70,7 +68,7 @@ def htmlentityreplace_errors(exc):
                     if not e.endswith(";"):
                         res.append(";")
                 else:
-                    res.append("&#x%s;"%(hex(cp)[2:]))
+                    res.append("&#x%s;" % (hex(cp)[2:]))
             return ("".join(res), exc.end)
         else:
             return xmlcharrefreplace_errors(exc)
@@ -104,10 +102,10 @@ class HTMLSerializer(object):
     sanitize = False
 
     options = ("quote_attr_values", "quote_char", "use_best_quote_char",
-          "minimize_boolean_attributes", "use_trailing_solidus",
-          "space_before_trailing_solidus", "omit_optional_tags",
-          "strip_whitespace", "inject_meta_charset", "escape_lt_in_attrs",
-          "escape_rcdata", "resolve_entities", "sanitize")
+               "minimize_boolean_attributes", "use_trailing_solidus",
+               "space_before_trailing_solidus", "omit_optional_tags",
+               "strip_whitespace", "inject_meta_charset", "escape_lt_in_attrs",
+               "escape_rcdata", "resolve_entities", "sanitize")
 
     def __init__(self, **kwargs):
         """Initialize HTMLSerializer.
@@ -228,25 +226,25 @@ def serialize(self, treewalker, encoding=None):
                     in_cdata = True
                 elif in_cdata:
                     self.serializeError(_("Unexpected child element of a CDATA element"))
-                attributes = []
-                for (attr_namespace,attr_name),attr_value in sorted(token["data"].items()):
-                    #TODO: Add namespace support here
+                for (attr_namespace, attr_name), attr_value in sorted(token["data"].items()):
+                    # TODO: Add namespace support here
                     k = attr_name
                     v = attr_value
                     yield self.encodeStrict(' ')
 
                     yield self.encodeStrict(k)
                     if not self.minimize_boolean_attributes or \
-                      (k not in booleanAttributes.get(name, tuple()) \
-                      and k not in booleanAttributes.get("", tuple())):
+                        (k not in booleanAttributes.get(name, tuple())
+                         and k not in booleanAttributes.get("", tuple())):
                         yield self.encodeStrict("=")
                         if self.quote_attr_values or not v:
                             quote_attr = True
                         else:
-                            quote_attr = reduce(lambda x,y: x or (y in v),
-                                spaceCharacters + ">\"'=", False)
+                            quote_attr = reduce(lambda x, y: x or (y in v),
+                                                spaceCharacters + ">\"'=", False)
                         v = v.replace("&", "&amp;")
-                        if self.escape_lt_in_attrs: v = v.replace("<", "&lt;")
+                        if self.escape_lt_in_attrs:
+                            v = v.replace("<", "&lt;")
                         if quote_attr:
                             quote_char = self.quote_char
                             if self.use_best_quote_char:
@@ -310,6 +308,7 @@ def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
         if self.strict:
             raise SerializeError
 
+
 def SerializeError(Exception):
     """Error in serialized tree"""
     pass
diff --git a/html5lib/tests/__init__.py b/html5lib/tests/__init__.py
index 903df92a..b8ce2de3 100644
--- a/html5lib/tests/__init__.py
+++ b/html5lib/tests/__init__.py
@@ -1,12 +1 @@
 from __future__ import absolute_import, division, unicode_literals
-
-import sys
-import os
-
-parent_path = os.path.abspath(os.path.join(os.path.split(__file__)[0], ".."))
-
-if not parent_path in sys.path:
-    sys.path.insert(0, parent_path)
-del parent_path
-
-from . import support
diff --git a/html5lib/tests/mockParser.py b/html5lib/tests/mockParser.py
index 83c62bb8..1b17e423 100644
--- a/html5lib/tests/mockParser.py
+++ b/html5lib/tests/mockParser.py
@@ -4,12 +4,13 @@
 import os
 
 if __name__ == '__main__':
-    #Allow us to import from the src directory
+    # Allow us to import from the src directory
     os.chdir(os.path.split(os.path.abspath(__file__))[0])
     sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src")))
 
 from tokenizer import HTMLTokenizer
 
+
 class HTMLParser(object):
     """ Fake parser to test tokenizer output """
     def parse(self, stream, output=True):
@@ -22,7 +23,8 @@ def parse(self, stream, output=True):
     x = HTMLParser()
     if len(sys.argv) > 1:
         if len(sys.argv) > 2:
-            import hotshot, hotshot.stats
+            import hotshot
+            import hotshot.stats
             prof = hotshot.Profile('stats.prof')
             prof.runcall(x.parse, sys.argv[1], False)
             prof.close()
diff --git a/html5lib/tests/performance/concatenation.py b/html5lib/tests/performance/concatenation.py
index f9ff754f..a1465036 100755
--- a/html5lib/tests/performance/concatenation.py
+++ b/html5lib/tests/performance/concatenation.py
@@ -1,23 +1,27 @@
 from __future__ import absolute_import, division, unicode_literals
 
+
 def f1():
     x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
     y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
     z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
     x += y + z
 
+
 def f2():
     x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
     y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
     z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
     x = x + y + z
 
+
 def f3():
     x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
     y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
     z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
     x = "".join((x, y, z))
 
+
 def f4():
     x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
     y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py
index c9c3236b..8bf6d4e3 100644
--- a/html5lib/tests/support.py
+++ b/html5lib/tests/support.py
@@ -12,15 +12,14 @@
                                                 os.path.pardir,
                                                 os.path.pardir)))
 
-import html5lib
-from html5lib import html5parser, treebuilders
+from html5lib import treebuilders
 del base_path
 
-#Build a dict of avaliable trees
-treeTypes = {"simpletree":treebuilders.getTreeBuilder("simpletree"),
-             "DOM":treebuilders.getTreeBuilder("dom")}
+# Build a dict of avaliable trees
+treeTypes = {"simpletree": treebuilders.getTreeBuilder("simpletree"),
+             "DOM": treebuilders.getTreeBuilder("dom")}
 
-#Try whatever etree implementations are avaliable from a list that are
+# Try whatever etree implementations are avaliable from a list that are
 #"supposed" to work
 try:
     import xml.etree.ElementTree as ElementTree
@@ -43,13 +42,16 @@
         pass
 
 try:
-    import lxml.etree as lxml
-    treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml")
+    import lxml.etree as lxml  # flake8: noqa
 except ImportError:
     pass
+else:
+    treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml")
+
 
 def get_data_files(subdirectory, files='*.dat'):
-    return glob.glob(os.path.join(test_dir,subdirectory,files))
+    return glob.glob(os.path.join(test_dir, subdirectory, files))
+
 
 class DefaultDict(dict):
     def __init__(self, default, *args, **kwargs):
@@ -59,6 +61,7 @@ def __init__(self, default, *args, **kwargs):
     def __getitem__(self, key):
         return dict.get(self, key, self.default)
 
+
 class TestData(object):
     def __init__(self, filename, newTestHeading="data", encoding="utf8"):
         if encoding == None:
@@ -73,17 +76,17 @@ def __del__(self):
 
     def __iter__(self):
         data = DefaultDict(None)
-        key=None
+        key = None
         for line in self.f:
             heading = self.isSectionHeading(line)
             if heading:
                 if data and heading == self.newTestHeading:
-                    #Remove trailing newline
+                    # Remove trailing newline
                     data[key] = data[key][:-1]
                     yield self.normaliseOutput(data)
                     data = DefaultDict(None)
                 key = heading
-                data[key]="" if self.encoding else b""
+                data[key] = "" if self.encoding else b""
             elif key is not None:
                 data[key] += line
         if data:
@@ -92,19 +95,20 @@ def __iter__(self):
     def isSectionHeading(self, line):
         """If the current heading is a test section heading return the heading,
         otherwise return False"""
-        #print(line)
+        # print(line)
         if line.startswith("#" if self.encoding else b"#"):
             return line[1:].strip()
         else:
             return False
 
     def normaliseOutput(self, data):
-        #Remove trailing newlines
-        for key,value in data.items():
+        # Remove trailing newlines
+        for key, value in data.items():
             if value.endswith("\n" if self.encoding else b"\n"):
                 data[key] = value[:-1]
         return data
 
+
 def convert(stripChars):
     def convertData(data):
         """convert the output of str(document) to the format used in the testcases"""
@@ -120,6 +124,7 @@ def convertData(data):
 
 convertExpected = convert(2)
 
+
 def errorMessage(input, expected, actual):
     msg = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n" %
            (repr(input), repr(expected), repr(actual)))
diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py
index 74730e60..df957eb1 100644
--- a/html5lib/tests/test_encoding.py
+++ b/html5lib/tests/test_encoding.py
@@ -1,6 +1,5 @@
 from __future__ import absolute_import, division, unicode_literals
 
-import re
 import os
 import unittest
 
@@ -12,6 +11,7 @@
 from .support import get_data_files, TestData, test_dir, errorMessage
 from html5lib import HTMLParser, inputstream
 
+
 class Html5EncodingTestCase(unittest.TestCase):
     def test_codec_name_a(self):
         self.assertEqual(inputstream.codecName("utf-8"), "utf-8")
@@ -25,13 +25,15 @@ def test_codec_name_c(self):
     def test_codec_name_d(self):
         self.assertEqual(inputstream.codecName("ISO_8859--1"), "windows-1252")
 
+
 def runParserEncodingTest(data, encoding):
     p = HTMLParser()
-    t = p.parse(data, useChardet=False)
+    p.parse(data, useChardet=False)
     encoding = encoding.lower().decode("ascii")
 
     assert encoding == p.tokenizer.stream.charEncoding[0], errorMessage(data, encoding, p.tokenizer.stream.charEncoding[0])
 
+
 def runPreScanEncodingTest(data, encoding):
     stream = inputstream.HTMLBinaryInputStream(data, chardet=False)
     encoding = encoding.lower().decode("ascii")
@@ -42,20 +44,20 @@ def runPreScanEncodingTest(data, encoding):
 
     assert encoding == stream.charEncoding[0], errorMessage(data, encoding, stream.charEncoding[0])
 
+
 def test_encoding():
     for filename in get_data_files("encoding"):
-        test_name = os.path.basename(filename).replace('.dat',''). \
-            replace('-','')
         tests = TestData(filename, b"data", encoding=None)
         for idx, test in enumerate(tests):
             yield (runParserEncodingTest, test[b'data'], test[b'encoding'])
             yield (runPreScanEncodingTest, test[b'data'], test[b'encoding'])
 
 try:
-    import chardet
+    import chardet  # flake8: noqa
+except ImportError:
+    print("chardet not found, skipping chardet tests")
+else:
     def test_chardet():
-        data = open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt"), "rb").read()
+        data = open(os.path.join(test_dir, "encoding", "chardet", "test_big5.txt"), "rb").read()
         encoding = inputstream.HTMLInputStream(data).charEncoding
         assert encoding[0].lower() == "big5"
-except ImportError:
-    print("chardet not found, skipping chardet tests")
diff --git a/html5lib/tests/test_parser.py b/html5lib/tests/test_parser.py
index 0bcd9787..ce117148 100644
--- a/html5lib/tests/test_parser.py
+++ b/html5lib/tests/test_parser.py
@@ -3,7 +3,6 @@
 import os
 import sys
 import traceback
-import io
 import warnings
 import re
 
@@ -11,14 +10,15 @@
 
 from .support import get_data_files
 from .support import TestData, convert, convertExpected, treeTypes
-import html5lib
-from html5lib import html5parser, treebuilders, constants
+from html5lib import html5parser, constants
 
-#Run the parse error checks
+# Run the parse error checks
 checkParseErrors = False
 
-#XXX - There should just be one function here but for some reason the testcase
-#format differs from the treedump format by a single space character
+# XXX - There should just be one function here but for some reason the testcase
+# format differs from the treedump format by a single space character
+
+
 def convertTreeDump(data):
     return "\n".join(convert(3)(data).split("\n")[1:])
 
@@ -29,10 +29,10 @@ def runParserTest(innerHTML, input, expected, errors, treeClass,
                   namespaceHTMLElements):
     warnings.resetwarnings()
     warnings.simplefilter("error")
-    #XXX - move this out into the setup function
-    #concatenate all consecutive character tokens into a single token
+    # XXX - move this out into the setup function
+    # concatenate all consecutive character tokens into a single token
     try:
-        p = html5parser.HTMLParser(tree = treeClass,
+        p = html5parser.HTMLParser(tree=treeClass,
                                    namespaceHTMLElements=namespaceHTMLElements)
     except constants.DataLossWarning:
         return
@@ -47,7 +47,7 @@ def runParserTest(innerHTML, input, expected, errors, treeClass,
                 return
     except:
         errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
-                               "\nTraceback:", traceback.format_exc()])
+                              "\nTraceback:", traceback.format_exc()])
         assert False, errorMsg
 
     output = convertTreeDump(p.tree.testSerializer(document))
@@ -57,7 +57,7 @@ def runParserTest(innerHTML, input, expected, errors, treeClass,
         expected = namespaceExpected(r"\1<html \2>", expected)
 
     errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
-                           "\nReceived:", output])
+                          "\nReceived:", output])
     assert expected == output, errorMsg
 
     errStr = []
@@ -67,17 +67,18 @@ def runParserTest(innerHTML, input, expected, errors, treeClass,
                                                constants.E[errorcode] % datavars))
 
     errorMsg2 = "\n".join(["\n\nInput:", input,
-                            "\nExpected errors (" + str(len(errors)) + "):\n" + "\n".join(errors),
-                            "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)])
+                           "\nExpected errors (" + str(len(errors)) + "):\n" + "\n".join(errors),
+                           "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)])
     if checkParseErrors:
             assert len(p.errors) == len(errors), errorMsg2
 
+
 def test_parser():
-    sys.stderr.write('Testing tree builders '+ " ".join(list(treeTypes.keys())) + "\n")
+    sys.stderr.write('Testing tree builders ' + " ".join(list(treeTypes.keys())) + "\n")
     files = get_data_files('tree-construction')
 
     for filename in files:
-        testName = os.path.basename(filename).replace(".dat","")
+        testName = os.path.basename(filename).replace(".dat", "")
         if testName in ("main-element", "template"):
             continue
 
@@ -85,9 +86,9 @@ def test_parser():
 
         for index, test in enumerate(tests):
             input, errors, innerHTML, expected = [test[key] for key in
-                                                      ('data', 'errors',
-                                                      'document-fragment',
-                                                      'document')]
+                                                  ('data', 'errors',
+                                                   'document-fragment',
+                                                   'document')]
             if errors:
                 errors = errors.split("\n")
 
diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py
index 048f41dc..1045bcdc 100755
--- a/html5lib/tests/test_parser2.py
+++ b/html5lib/tests/test_parser2.py
@@ -2,7 +2,7 @@
 
 import io
 
-from . import support
+from . import support  # flake8: noqa
 from html5lib import html5parser
 from html5lib.constants import namespaces
 from html5lib.treebuilders import dom
@@ -10,34 +10,38 @@
 import unittest
 
 # tests that aren't autogenerated from text files
+
+
 class MoreParserTests(unittest.TestCase):
 
-  def test_assertDoctypeCloneable(self):
-    parser = html5parser.HTMLParser(tree=dom.TreeBuilder)
-    doc = parser.parse('<!DOCTYPE HTML>')
-    self.assert_(doc.cloneNode(True))
+    def test_assertDoctypeCloneable(self):
+        parser = html5parser.HTMLParser(tree=dom.TreeBuilder)
+        doc = parser.parse('<!DOCTYPE HTML>')
+        self.assert_(doc.cloneNode(True))
 
-  def test_line_counter(self):
-    # http://groups.google.com/group/html5lib-discuss/browse_frm/thread/f4f00e4a2f26d5c0
-    parser = html5parser.HTMLParser(tree=dom.TreeBuilder)
-    parser.parse("<pre>\nx\n&gt;\n</pre>")
+    def test_line_counter(self):
+        # http://groups.google.com/group/html5lib-discuss/browse_frm/thread/f4f00e4a2f26d5c0
+        parser = html5parser.HTMLParser(tree=dom.TreeBuilder)
+        parser.parse("<pre>\nx\n&gt;\n</pre>")
 
-  def test_namespace_html_elements_0(self):
-    parser = html5parser.HTMLParser(namespaceHTMLElements=True)
-    doc = parser.parse("<html></html>")
-    self.assert_(doc.childNodes[0].namespace == namespaces["html"])
+    def test_namespace_html_elements_0(self):
+        parser = html5parser.HTMLParser(namespaceHTMLElements=True)
+        doc = parser.parse("<html></html>")
+        self.assert_(doc.childNodes[0].namespace == namespaces["html"])
 
-  def test_namespace_html_elements_1(self):
-    parser = html5parser.HTMLParser(namespaceHTMLElements=False)
-    doc = parser.parse("<html></html>")
-    self.assert_(doc.childNodes[0].namespace == None)
+    def test_namespace_html_elements_1(self):
+        parser = html5parser.HTMLParser(namespaceHTMLElements=False)
+        doc = parser.parse("<html></html>")
+        self.assert_(doc.childNodes[0].namespace == None)
+
+    def test_unicode_file(self):
+        parser = html5parser.HTMLParser()
+        doc = parser.parse(io.StringIO("a"))
 
-  def test_unicode_file(self):
-    parser = html5parser.HTMLParser()
-    doc = parser.parse(io.StringIO("a"))
 
 def buildTestSuite():
-  return unittest.defaultTestLoader.loadTestsFromName(__name__)
+    return unittest.defaultTestLoader.loadTestsFromName(__name__)
+
 
 def main():
     buildTestSuite()
diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py
index 2da80d39..5bd083fe 100644
--- a/html5lib/tests/test_sanitizer.py
+++ b/html5lib/tests/test_sanitizer.py
@@ -1,9 +1,5 @@
 from __future__ import absolute_import, division, unicode_literals
 
-import os
-import sys
-import unittest
-
 try:
     import json
 except ImportError:
@@ -11,68 +7,74 @@
 
 from html5lib import html5parser, sanitizer, constants
 
+
 def runSanitizerTest(name, expected, input):
     expected = ''.join([token.toxml() for token in html5parser.HTMLParser().
-                         parseFragment(expected).childNodes])
+                        parseFragment(expected).childNodes])
     expected = json.loads(json.dumps(expected))
     assert expected == sanitize_html(input)
 
+
 def sanitize_html(stream):
     return ''.join([token.toxml() for token in
                     html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
-                     parseFragment(stream).childNodes])
+                    parseFragment(stream).childNodes])
+
 
 def test_should_handle_astral_plane_characters():
     assert "<p>\U0001d4b5 \U0001d538</p>" == sanitize_html("<p>&#x1d4b5; &#x1d538;</p>")
 
+
 def test_sanitizer():
     for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
         if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']:
-            continue ### TODO
+            continue  # TODO
         if tag_name != tag_name.lower():
-            continue ### TODO
+            continue  # TODO
         if tag_name == 'image':
             yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
-              "<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
-              "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
+                   "<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
+                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
         elif tag_name == 'br':
             yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
-              "<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
-              "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
+                   "<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
+                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
         elif tag_name in constants.voidElements:
             yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
-              "<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
-              "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
+                   "<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
+                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
         else:
             yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
-              "<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name,tag_name),
-              "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
+                   "<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name, tag_name),
+                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
 
     for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
         tag_name = tag_name.upper()
         yield (runSanitizerTest, "test_should_forbid_%s_tag" % tag_name,
-          "&lt;%s title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/%s&gt;" % (tag_name,tag_name),
-          "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
+               "&lt;%s title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/%s&gt;" % (tag_name, tag_name),
+               "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
 
     for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
-        if attribute_name != attribute_name.lower(): continue ### TODO
-        if attribute_name == 'style': continue
+        if attribute_name != attribute_name.lower():
+            continue  # TODO
+        if attribute_name == 'style':
+            continue
         yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name,
-          "<p %s=\"foo\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % attribute_name,
-          "<p %s='foo'>foo <bad>bar</bad> baz</p>" % attribute_name)
+               "<p %s=\"foo\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % attribute_name,
+               "<p %s='foo'>foo <bad>bar</bad> baz</p>" % attribute_name)
 
     for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
         attribute_name = attribute_name.upper()
         yield (runSanitizerTest, "test_should_forbid_%s_attribute" % attribute_name,
-          "<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>",
-          "<p %s='display: none;'>foo <bad>bar</bad> baz</p>" % attribute_name)
+               "<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>",
+               "<p %s='display: none;'>foo <bad>bar</bad> baz</p>" % attribute_name)
 
     for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
         yield (runSanitizerTest, "test_should_allow_%s_uris" % protocol,
-          "<a href=\"%s\">foo</a>" % protocol,
-          """<a href="%s">foo</a>""" % protocol)
+               "<a href=\"%s\">foo</a>" % protocol,
+               """<a href="%s">foo</a>""" % protocol)
 
     for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
         yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
-          "<a href=\"%s\">foo</a>" % protocol,
-        """<a href="%s">foo</a>""" % protocol)
+               "<a href=\"%s\">foo</a>" % protocol,
+               """<a href="%s">foo</a>""" % protocol)
diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py
index 21abc5ba..adebf609 100644
--- a/html5lib/tests/test_serializer.py
+++ b/html5lib/tests/test_serializer.py
@@ -1,6 +1,5 @@
 from __future__ import absolute_import, division, unicode_literals
 
-import os
 import unittest
 from .support import get_data_files
 
@@ -15,7 +14,7 @@
     unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
 
 import html5lib
-from html5lib import html5parser, serializer, constants
+from html5lib import serializer, constants
 from html5lib.treewalkers._base import TreeWalker
 
 optionals_loaded = []
@@ -28,6 +27,7 @@
 
 default_namespace = constants.namespaces["html"]
 
+
 class JsonWalker(TreeWalker):
     def __iter__(self):
         for token in self.tree:
@@ -83,8 +83,9 @@ def _convertAttrib(self, attribs):
 
 
 def serialize_html(input, options):
-    options = dict([(str(k),v) for k,v in options.items()])
-    return serializer.HTMLSerializer(**options).render(JsonWalker(input),options.get("encoding",None))
+    options = dict([(str(k), v) for k, v in options.items()])
+    return serializer.HTMLSerializer(**options).render(JsonWalker(input), options.get("encoding", None))
+
 
 def runSerializerTest(input, expected, options):
     encoding = options.get("encoding", None)
@@ -95,7 +96,7 @@ def runSerializerTest(input, expected, options):
 
     result = serialize_html(input, options)
     if len(expected) == 1:
-        assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s"%(expected[0], result, str(options))
+        assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
     elif result not in expected:
         assert False, "Expected: %s, Received: %s" % (expected, result)
 
@@ -151,27 +152,27 @@ def setUp(self):
 
         def testEntityReplacement(self):
             doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
-            tree = etree.fromstring(doc, parser = self.parser).getroottree()
+            tree = etree.fromstring(doc, parser=self.parser).getroottree()
             result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False)
             self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>""", result)
 
         def testEntityXML(self):
             doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>"""
-            tree = etree.fromstring(doc, parser = self.parser).getroottree()
+            tree = etree.fromstring(doc, parser=self.parser).getroottree()
             result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False)
             self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>""", result)
 
         def testEntityNoResolve(self):
             doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
-            tree = etree.fromstring(doc, parser = self.parser).getroottree()
+            tree = etree.fromstring(doc, parser=self.parser).getroottree()
             result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False,
                                           resolve_entities=False)
             self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>""", result)
 
+
 def test_serializer():
     for filename in get_data_files('serializer', '*.test'):
         with open(filename) as fp:
             tests = json.load(fp)
-            test_name = os.path.basename(filename).replace('.test','')
             for index, test in enumerate(tests['tests']):
                 yield runSerializerTest, test["input"], test["expected"], test.get("options", {})
diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py
index cc8035fd..52ee03e7 100755
--- a/html5lib/tests/test_stream.py
+++ b/html5lib/tests/test_stream.py
@@ -1,16 +1,20 @@
 from __future__ import absolute_import, division, unicode_literals
 
-from . import support
-import unittest, codecs
+from . import support  # flake8: noqa
+import unittest
+import codecs
 
 from html5lib.inputstream import HTMLInputStream, HTMLUnicodeInputStream, HTMLBinaryInputStream
 
+
 class HTMLUnicodeInputStreamShortChunk(HTMLUnicodeInputStream):
     _defaultChunkSize = 2
 
+
 class HTMLBinaryInputStreamShortChunk(HTMLBinaryInputStream):
     _defaultChunkSize = 2
 
+
 class HTMLInputStreamTest(unittest.TestCase):
 
     def test_char_ascii(self):
@@ -36,7 +40,7 @@ def test_bom(self):
         self.assertEquals(stream.char(), "'")
 
     def test_utf_16(self):
-        stream = HTMLInputStream((' '*1025).encode('utf-16'))
+        stream = HTMLInputStream((' ' * 1025).encode('utf-16'))
         self.assert_(stream.charEncoding[0] in ['utf-16-le', 'utf-16-be'], stream.charEncoding)
         self.assertEquals(len(stream.charsUntil(' ', True)), 1025)
 
@@ -87,9 +91,11 @@ def test_position2(self):
         self.assertEquals(stream.char(), "d")
         self.assertEquals(stream.position(), (2, 1))
 
+
 def buildTestSuite():
     return unittest.defaultTestLoader.loadTestsFromName(__name__)
 
+
 def main():
     buildTestSuite()
     unittest.main()
diff --git a/html5lib/tests/test_tokenizer.py b/html5lib/tests/test_tokenizer.py
index c1be14cf..2642e719 100644
--- a/html5lib/tests/test_tokenizer.py
+++ b/html5lib/tests/test_tokenizer.py
@@ -1,10 +1,6 @@
 from __future__ import absolute_import, division, unicode_literals
 
 
-
-import sys
-import os
-import io
 import warnings
 import re
 
@@ -17,6 +13,7 @@
 from html5lib.tokenizer import HTMLTokenizer
 from html5lib import constants
 
+
 class TokenizerTestParser(object):
     def __init__(self, initialState, lastStartTag=None):
         self.tokenizer = HTMLTokenizer
@@ -30,9 +27,9 @@ def parse(self, stream, encoding=None, innerHTML=False):
         tokenizer.state = getattr(tokenizer, self._state)
         if self._lastStartTag is not None:
             tokenizer.currentToken = {"type": "startTag",
-                                      "name":self._lastStartTag}
+                                      "name": self._lastStartTag}
 
-        types = dict((v,k) for k,v in constants.tokenTypes.items())
+        types = dict((v, k) for k, v in constants.tokenTypes.items())
         for token in tokenizer:
             getattr(self, 'process%s' % types[token["type"]])(token)
 
@@ -71,12 +68,13 @@ def processEOF(self, token):
     def processParseError(self, token):
         self.outputTokens.append(["ParseError", token["data"]])
 
+
 def concatenateCharacterTokens(tokens):
     outputTokens = []
     for token in tokens:
         if not "ParseError" in token and token[0] == "Character":
             if (outputTokens and not "ParseError" in outputTokens[-1] and
-                outputTokens[-1][0] == "Character"):
+                    outputTokens[-1][0] == "Character"):
                 outputTokens[-1][1] += token[1]
             else:
                 outputTokens.append(token)
@@ -84,6 +82,7 @@ def concatenateCharacterTokens(tokens):
             outputTokens.append(token)
     return outputTokens
 
+
 def normalizeTokens(tokens):
     # TODO: convert tests to reflect arrays
     for i, token in enumerate(tokens):
@@ -91,6 +90,7 @@ def normalizeTokens(tokens):
             tokens[i] = token[0]
     return tokens
 
+
 def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder,
                 ignoreErrors=False):
     """Test whether the test has passed or failed
@@ -98,10 +98,10 @@ def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder,
     If the ignoreErrorOrder flag is set to true we don't test the relative
     positions of parse errors and non parse errors
     """
-    checkSelfClosing= False
+    checkSelfClosing = False
     for token in expectedTokens:
         if (token[0] == "StartTag" and len(token) == 4
-            or token[0] == "EndTag" and len(token) == 3):
+                or token[0] == "EndTag" and len(token) == 3):
             checkSelfClosing = True
             break
 
@@ -113,10 +113,10 @@ def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder,
     if not ignoreErrorOrder and not ignoreErrors:
         return expectedTokens == receivedTokens
     else:
-        #Sort the tokens into two groups; non-parse errors and parse errors
-        tokens = {"expected":[[],[]], "received":[[],[]]}
+        # Sort the tokens into two groups; non-parse errors and parse errors
+        tokens = {"expected": [[], []], "received": [[], []]}
         for tokenType, tokenList in zip(list(tokens.keys()),
-                                         (expectedTokens, receivedTokens)):
+                                       (expectedTokens, receivedTokens)):
             for token in tokenList:
                 if token != "ParseError":
                     tokens[tokenType][0].append(token)
@@ -125,6 +125,7 @@ def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder,
                         tokens[tokenType][1].append(token)
         return tokens["expected"] == tokens["received"]
 
+
 def unescape(test):
     def decode(inp):
         return inp.encode("utf-8").decode("unicode-escape")
@@ -141,6 +142,7 @@ def decode(inp):
                     token[2][decode(key)] = decode(value)
     return test
 
+
 def runTokenizerTest(test):
     warnings.resetwarnings()
     warnings.simplefilter("error")
@@ -154,7 +156,7 @@ def runTokenizerTest(test):
     tokens = concatenateCharacterTokens(tokens)
     received = normalizeTokens(tokens)
     errorMsg = "\n".join(["\n\nInitial state:",
-                          test['initialState'] ,
+                          test['initialState'],
                           "\nInput:", test['input'],
                           "\nExpected:", repr(expected),
                           "\nreceived:", repr(tokens)])
@@ -162,23 +164,25 @@ def runTokenizerTest(test):
     ignoreErrorOrder = test.get('ignoreErrorOrder', False)
     assert tokensMatch(expected, received, ignoreErrorOrder, True), errorMsg
 
+
 def _doCapitalize(match):
     return match.group(1).upper()
 
 _capitalizeRe = re.compile(r"\W+(\w)").sub
 
+
 def capitalize(s):
     s = s.lower()
     s = _capitalizeRe(_doCapitalize, s)
     return s
 
+
 def testTokenizer():
     for filename in get_data_files('tokenizer', '*.test'):
         with open(filename) as fp:
             tests = json.load(fp)
-            testName = os.path.basename(filename).replace(".test","")
             if 'tests' in tests:
-                for index,test in enumerate(tests['tests']):
+                for index, test in enumerate(tests['tests']):
                     if 'initialStates' not in test:
                         test["initialStates"] = ["Data state"]
                     if 'doubleEscaped' in test:
diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
index 566acf81..7f7853ed 100644
--- a/html5lib/tests/test_treewalkers.py
+++ b/html5lib/tests/test_treewalkers.py
@@ -14,7 +14,7 @@
 from .support import get_data_files, TestData, convertExpected
 
 from html5lib import html5parser, treewalkers, treebuilders, constants
-from html5lib.filters.lint import Filter as LintFilter, LintError
+
 
 def PullDOMAdapter(node):
     from xml.dom import Node
@@ -45,70 +45,62 @@ def PullDOMAdapter(node):
         raise NotImplementedError("Node type not supported: " + str(node.nodeType))
 
 treeTypes = {
-"simpletree":  {"builder": treebuilders.getTreeBuilder("simpletree"),
-                "walker":  treewalkers.getTreeWalker("simpletree")},
-"DOM":         {"builder": treebuilders.getTreeBuilder("dom"),
-                "walker":  treewalkers.getTreeWalker("dom")},
-"PullDOM":     {"builder": treebuilders.getTreeBuilder("dom"),
-                "adapter": PullDOMAdapter,
-                "walker":  treewalkers.getTreeWalker("pulldom")},
+    "simpletree": {"builder": treebuilders.getTreeBuilder("simpletree"),
+                   "walker": treewalkers.getTreeWalker("simpletree")},
+"DOM": {"builder": treebuilders.getTreeBuilder("dom"),
+        "walker": treewalkers.getTreeWalker("dom")},
+"PullDOM": {"builder": treebuilders.getTreeBuilder("dom"),
+            "adapter": PullDOMAdapter,
+            "walker": treewalkers.getTreeWalker("pulldom")},
 }
 
-#Try whatever etree implementations are available from a list that are
+# Try whatever etree implementations are available from a list that are
 #"supposed" to work
 try:
     import xml.etree.ElementTree as ElementTree
+except ImportError:
+    pass
+else:
     treeTypes['ElementTree'] = \
         {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
-         "walker":  treewalkers.getTreeWalker("etree", ElementTree)}
-except ImportError:
-    try:
-        import elementtree.ElementTree as ElementTree
-        treeTypes['ElementTree'] = \
-            {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
-             "walker":  treewalkers.getTreeWalker("etree", ElementTree)}
-    except ImportError:
-        pass
+         "walker": treewalkers.getTreeWalker("etree", ElementTree)}
 
 try:
     import xml.etree.cElementTree as ElementTree
+except ImportError:
+    pass
+else:
     treeTypes['cElementTree'] = \
         {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
-         "walker":  treewalkers.getTreeWalker("etree", ElementTree)}
-except ImportError:
-    try:
-        import cElementTree as ElementTree
-        treeTypes['cElementTree'] = \
-            {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
-             "walker":  treewalkers.getTreeWalker("etree", ElementTree)}
-    except ImportError:
-        pass
+         "walker": treewalkers.getTreeWalker("etree", ElementTree)}
+
 
 try:
-    import lxml.etree as ElementTree
-#    treeTypes['lxml_as_etree'] = \
-#        {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
-#         "walker":  treewalkers.getTreeWalker("etree", ElementTree)}
-    treeTypes['lxml_native'] = \
-        {"builder": treebuilders.getTreeBuilder("lxml"),
-         "walker":  treewalkers.getTreeWalker("lxml")}
+    import lxml.etree as ElementTree  # flake8: noqa
 except ImportError:
     pass
+else:
+    treeTypes['lxml_native'] = \
+        {"builder": treebuilders.getTreeBuilder("lxml"),
+         "walker": treewalkers.getTreeWalker("lxml")}
 
-#Try whatever etree implementations are available from a list that are
+
+# Try whatever etree implementations are available from a list that are
 #"supposed" to work
 try:
     import pxdom
     treeTypes['pxdom'] = \
         {"builder": treebuilders.getTreeBuilder("dom", pxdom),
-         "walker":  treewalkers.getTreeWalker("dom")}
+         "walker": treewalkers.getTreeWalker("dom")}
 except ImportError:
     pass
 
 try:
     from genshi.core import QName, Attrs
     from genshi.core import START, END, TEXT, COMMENT, DOCTYPE
-
+except ImportError:
+    pass
+else:
     def GenshiAdapter(tree):
         text = None
         for token in treewalkers.getTreeWalker("simpletree")(tree):
@@ -149,17 +141,16 @@ def GenshiAdapter(tree):
                                 token["systemId"]), (None, -1, -1)
 
             else:
-                pass # FIXME: What to do?
+                pass  # FIXME: What to do?
 
         if text is not None:
             yield TEXT, text, (None, -1, -1)
 
     treeTypes["genshi"] = \
-       {"builder": treebuilders.getTreeBuilder("simpletree"),
-        "adapter": GenshiAdapter,
-        "walker":  treewalkers.getTreeWalker("genshi")}
-except ImportError:
-    pass
+        {"builder": treebuilders.getTreeBuilder("simpletree"),
+         "adapter": GenshiAdapter,
+         "walker": treewalkers.getTreeWalker("genshi")}
+
 
 def concatenateCharacterTokens(tokens):
     charactersToken = None
@@ -178,6 +169,7 @@ def concatenateCharacterTokens(tokens):
     if charactersToken is not None:
         yield charactersToken
 
+
 def convertTokens(tokens):
     output = []
     indent = 0
@@ -185,7 +177,7 @@ def convertTokens(tokens):
         type = token["type"]
         if type in ("StartTag", "EmptyTag"):
             if (token["namespace"] and
-                token["namespace"] != constants.namespaces["html"]):
+                    token["namespace"] != constants.namespaces["html"]):
                 if token["namespace"] in constants.prefixes:
                     name = constants.prefixes[token["namespace"]]
                 else:
@@ -193,12 +185,12 @@ def convertTokens(tokens):
                 name += " " + token["name"]
             else:
                 name = token["name"]
-            output.append("%s<%s>" % (" "*indent, name))
+            output.append("%s<%s>" % (" " * indent, name))
             indent += 2
             attrs = token["data"]
             if attrs:
-                #TODO: Remove this if statement, attrs should always exist
-                for (namespace,name),value in sorted(attrs.items()):
+                # TODO: Remove this if statement, attrs should always exist
+                for (namespace, name), value in sorted(attrs.items()):
                     if namespace:
                         if namespace in constants.prefixes:
                             outputname = constants.prefixes[namespace]
@@ -207,41 +199,43 @@ def convertTokens(tokens):
                         outputname += " " + name
                     else:
                         outputname = name
-                    output.append("%s%s=\"%s\"" % (" "*indent, outputname, value))
+                    output.append("%s%s=\"%s\"" % (" " * indent, outputname, value))
             if type == "EmptyTag":
                 indent -= 2
         elif type == "EndTag":
             indent -= 2
         elif type == "Comment":
-            output.append("%s<!-- %s -->" % (" "*indent, token["data"]))
+            output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
         elif type == "Doctype":
             if token["name"]:
                 if token["publicId"]:
-                    output.append("""%s<!DOCTYPE %s "%s" "%s">"""%
-                                  (" "*indent, token["name"],
+                    output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
+                                  (" " * indent, token["name"],
                                    token["publicId"],
                                    token["systemId"] and token["systemId"] or ""))
                 elif token["systemId"]:
-                    output.append("""%s<!DOCTYPE %s "" "%s">"""%
-                                  (" "*indent, token["name"],
+                    output.append("""%s<!DOCTYPE %s "" "%s">""" %
+                                  (" " * indent, token["name"],
                                    token["systemId"]))
                 else:
-                    output.append("%s<!DOCTYPE %s>"%(" "*indent,
-                                                     token["name"]))
+                    output.append("%s<!DOCTYPE %s>" % (" " * indent,
+                                                       token["name"]))
             else:
-                output.append("%s<!DOCTYPE >" % (" "*indent,))
+                output.append("%s<!DOCTYPE >" % (" " * indent,))
         elif type in ("Characters", "SpaceCharacters"):
-            output.append("%s\"%s\"" % (" "*indent, token["data"]))
+            output.append("%s\"%s\"" % (" " * indent, token["data"]))
         else:
-            pass # TODO: what to do with errors?
+            pass  # TODO: what to do with errors?
     return "\n".join(output)
 
 import re
-attrlist = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+",re.M)
+attrlist = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+", re.M)
+
+
 def sortattrs(x):
-  lines = x.group(0).split("\n")
-  lines.sort()
-  return "\n".join(lines)
+    lines = x.group(0).split("\n")
+    lines.sort()
+    return "\n".join(lines)
 
 
 class TokenTestCase(unittest.TestCase):
@@ -258,26 +252,27 @@ def test_all_tokens(self):
             {'data': 'c', 'type': 'Characters'},
             {'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
             {'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'}
-            ]
+        ]
         for treeName, treeCls in treeTypes.items():
-            p = html5parser.HTMLParser(tree = treeCls["builder"])
+            p = html5parser.HTMLParser(tree=treeCls["builder"])
             document = p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
             document = treeCls.get("adapter", lambda x: x)(document)
             output = treeCls["walker"](document)
             for expectedToken, outputToken in zip(expected, output):
                 self.assertEqual(expectedToken, outputToken)
 
+
 def runTreewalkerTest(innerHTML, input, expected, errors, treeClass):
     warnings.resetwarnings()
     warnings.simplefilter("error")
     try:
-        p = html5parser.HTMLParser(tree = treeClass["builder"])
+        p = html5parser.HTMLParser(tree=treeClass["builder"])
         if innerHTML:
             document = p.parseFragment(input, innerHTML)
         else:
             document = p.parse(input)
     except constants.DataLossWarning:
-        #Ignore testcases we know we don't pass
+        # Ignore testcases we know we don't pass
         return
 
     document = treeClass.get("adapter", lambda x: x)(document)
@@ -289,21 +284,22 @@ def runTreewalkerTest(innerHTML, input, expected, errors, treeClass):
                                     [line + "\n" for line in output.splitlines()],
                                     "Expected", "Received"))
         assert expected == output, "\n".join([
-                "", "Input:", input,
+            "", "Input:", input,
                 "", "Expected:", expected,
                 "", "Received:", output,
                 "", "Diff:", diff,
-                ])
+        ])
     except NotImplementedError:
-        pass # Amnesty for those that confess...
+        pass  # Amnesty for those that confess...
+
 
 def test_treewalker():
-    sys.stdout.write('Testing tree walkers '+ " ".join(list(treeTypes.keys())) + "\n")
+    sys.stdout.write('Testing tree walkers ' + " ".join(list(treeTypes.keys())) + "\n")
 
     for treeName, treeCls in treeTypes.items():
         files = get_data_files('tree-construction')
         for filename in files:
-            testName = os.path.basename(filename).replace(".dat","")
+            testName = os.path.basename(filename).replace(".dat", "")
             if testName in ("main-element", "template"):
                 continue
 
@@ -316,5 +312,3 @@ def test_treewalker():
                                                                "document")]
                 errors = errors.split("\n")
                 yield runTreewalkerTest, innerHTML, input, expected, errors, treeCls
-
-
diff --git a/html5lib/tests/test_whitespace_filter.py b/html5lib/tests/test_whitespace_filter.py
index 9525d41b..9ed27fd6 100644
--- a/html5lib/tests/test_whitespace_filter.py
+++ b/html5lib/tests/test_whitespace_filter.py
@@ -11,6 +11,7 @@
 except AttributeError:
     unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
 
+
 class TestCase(unittest.TestCase):
     def runTest(self, input, expected):
         output = list(Filter(input))
@@ -24,19 +25,19 @@ def runTestUnmodifiedOutput(self, input):
 
     def testPhrasingElements(self):
         self.runTestUnmodifiedOutput(
-            [{"type": "Characters", "data": "This is a " },
-             {"type": "StartTag", "name": "span", "data": [] },
-             {"type": "Characters", "data": "phrase" },
+            [{"type": "Characters", "data": "This is a "},
+             {"type": "StartTag", "name": "span", "data": []},
+             {"type": "Characters", "data": "phrase"},
              {"type": "EndTag", "name": "span", "data": []},
-             {"type": "SpaceCharacters", "data": " " },
-             {"type": "Characters", "data": "with" },
-             {"type": "SpaceCharacters", "data": " " },
-             {"type": "StartTag", "name": "em", "data": [] },
-             {"type": "Characters", "data": "emphasised text" },
+             {"type": "SpaceCharacters", "data": " "},
+             {"type": "Characters", "data": "with"},
+             {"type": "SpaceCharacters", "data": " "},
+             {"type": "StartTag", "name": "em", "data": []},
+             {"type": "Characters", "data": "emphasised text"},
              {"type": "EndTag", "name": "em", "data": []},
-             {"type": "Characters", "data": " and an " },
-             {"type": "StartTag", "name": "img", "data": [["alt", "image"]] },
-             {"type": "Characters", "data": "." }])
+             {"type": "Characters", "data": " and an "},
+             {"type": "StartTag", "name": "img", "data": [["alt", "image"]]},
+             {"type": "Characters", "data": "."}])
 
     def testLeadingWhitespace(self):
         self.runTest(
@@ -119,9 +120,11 @@ def testWhitespaceInPre(self):
              {"type": "Characters", "data": "foo" + spaceCharacters + "bar"},
              {"type": "EndTag", "name": "pre", "data": []}])
 
+
 def buildTestSuite():
     return unittest.defaultTestLoader.loadTestsFromName(__name__)
 
+
 def main():
     buildTestSuite()
     unittest.main()
diff --git a/html5lib/tests/tokenizertotree.py b/html5lib/tests/tokenizertotree.py
index ddcaa69f..b841c76c 100644
--- a/html5lib/tests/tokenizertotree.py
+++ b/html5lib/tests/tokenizertotree.py
@@ -7,29 +7,30 @@
 
 import html5lib
 from . import support
-from . import test_parser
 from . import test_tokenizer
 
 p = html5lib.HTMLParser()
 
 unnamespaceExpected = re.compile(r"^(\|\s*)<html ([^>]+)>", re.M).sub
 
+
 def main(out_path):
     if not os.path.exists(out_path):
-        sys.stderr.write("Path %s does not exist"%out_path)
+        sys.stderr.write("Path %s does not exist" % out_path)
         sys.exit(1)
 
     for filename in support.get_data_files('tokenizer', '*.test'):
         run_file(filename, out_path)
 
+
 def run_file(filename, out_path):
     try:
-        tests_data = json.load(file(filename))
+        tests_data = json.load(open(filename, "r"))
     except ValueError:
-        sys.stderr.write("Failed to load %s\n"%filename)
+        sys.stderr.write("Failed to load %s\n" % filename)
         return
     name = os.path.splitext(os.path.split(filename)[1])[0]
-    output_file = open(os.path.join(out_path, "tokenizer_%s.dat"%name), "w")
+    output_file = open(os.path.join(out_path, "tokenizer_%s.dat" % name), "w")
 
     if 'tests' in tests_data:
         for test_data in tests_data['tests']:
@@ -38,13 +39,14 @@ def run_file(filename, out_path):
 
             for initial_state in test_data["initialStates"]:
                 if initial_state != "Data state":
-                    #don't support this yet
+                    # don't support this yet
                     continue
                 test = make_test(test_data)
                 output_file.write(test)
 
     output_file.close()
 
+
 def make_test(test_data):
     if 'doubleEscaped' in test_data:
         test_data = test_tokenizer.unescape_test(test_data)
@@ -55,8 +57,8 @@ def make_test(test_data):
     rv.append("#errors")
     tree = p.parse(test_data["input"])
     output = p.tree.testSerializer(tree)
-    output  = "\n".join(("| "+ line[3:]) if line.startswith("|  ") else line
-                        for line in output.split("\n"))
+    output = "\n".join(("| " + line[3:]) if line.startswith("|  ") else line
+                       for line in output.split("\n"))
     output = unnamespaceExpected(r"\1<\2>", output)
     rv.append(output.encode("utf8"))
     rv.append("")
diff --git a/html5lib/tokenizer.py b/html5lib/tokenizer.py
index 72d3057a..c49eee0d 100644
--- a/html5lib/tokenizer.py
+++ b/html5lib/tokenizer.py
@@ -1,15 +1,15 @@
 from __future__ import absolute_import, division, unicode_literals
 
 try:
-    chr = unichr
+    chr = unichr # flake8: noqa
 except NameError:
     pass
 
 from collections import deque
 
 from .constants import spaceCharacters
-from .constants import entitiesWindows1252, entities
-from .constants import asciiLowercase, asciiLetters, asciiUpper2Lower
+from .constants import entities
+from .constants import asciiLetters, asciiUpper2Lower
 from .constants import digits, hexDigits, EOF
 from .constants import tokenTypes, tagTokenTypes
 from .constants import replacementCharacters
@@ -20,6 +20,7 @@
 
 entitiesTrie = Trie(entities)
 
+
 class HTMLTokenizer(object):
     """ This class takes care of tokenizing HTML.
 
@@ -39,7 +40,7 @@ def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
         self.stream = HTMLInputStream(stream, encoding, parseMeta, useChardet)
         self.parser = parser
 
-        #Perform case conversions?
+        # Perform case conversions?
         self.lowercaseElementName = lowercaseElementName
         self.lowercaseAttrName = lowercaseAttrName
 
@@ -97,20 +98,20 @@ def consumeNumberEntity(self, isHex):
         if charAsInt in replacementCharacters:
             char = replacementCharacters[charAsInt]
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "illegal-codepoint-for-numeric-entity",
-              "datavars": {"charAsInt": charAsInt}})
+                                    "illegal-codepoint-for-numeric-entity",
+                                    "datavars": {"charAsInt": charAsInt}})
         elif ((0xD800 <= charAsInt <= 0xDFFF) or
               (charAsInt > 0x10FFFF)):
             char = "\uFFFD"
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "illegal-codepoint-for-numeric-entity",
-              "datavars": {"charAsInt": charAsInt}})
+                                    "illegal-codepoint-for-numeric-entity",
+                                    "datavars": {"charAsInt": charAsInt}})
         else:
-            #Should speed up this check somehow (e.g. move the set to a constant)
+            # Should speed up this check somehow (e.g. move the set to a constant)
             if ((0x0001 <= charAsInt <= 0x0008) or
                 (0x000E <= charAsInt <= 0x001F) or
-                (0x007F  <= charAsInt <= 0x009F) or
-                (0xFDD0  <= charAsInt <= 0xFDEF) or
+                (0x007F <= charAsInt <= 0x009F) or
+                (0xFDD0 <= charAsInt <= 0xFDEF) or
                 charAsInt in frozenset([0x000B, 0xFFFE, 0xFFFF, 0x1FFFE,
                                         0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
                                         0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE,
@@ -122,7 +123,7 @@ def consumeNumberEntity(self, isHex):
                                         0xFFFFF, 0x10FFFE, 0x10FFFF])):
                 self.tokenQueue.append({"type": tokenTypes["ParseError"],
                                         "data":
-                                            "illegal-codepoint-for-numeric-entity",
+                                        "illegal-codepoint-for-numeric-entity",
                                         "datavars": {"charAsInt": charAsInt}})
             try:
                 # Try/except needed as UCS-2 Python builds' unichar only works
@@ -136,7 +137,7 @@ def consumeNumberEntity(self, isHex):
         # invoke parseError on parser.
         if c != ";":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "numeric-entity-without-semicolon"})
+                                    "numeric-entity-without-semicolon"})
             self.stream.unget(c)
 
         return char
@@ -147,7 +148,7 @@ def consumeEntity(self, allowedChar=None, fromAttribute=False):
 
         charStack = [self.stream.char()]
         if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&")
-            or (allowedChar is not None and allowedChar == charStack[0])):
+                or (allowedChar is not None and allowedChar == charStack[0])):
             self.stream.unget(charStack[0])
 
         elif charStack[0] == "#":
@@ -160,14 +161,14 @@ def consumeEntity(self, allowedChar=None, fromAttribute=False):
 
             # charStack[-1] should be the first digit
             if (hex and charStack[-1] in hexDigits) \
-             or (not hex and charStack[-1] in digits):
+                    or (not hex and charStack[-1] in digits):
                 # At least one digit found, so consume the whole number
                 self.stream.unget(charStack[-1])
                 output = self.consumeNumberEntity(hex)
             else:
                 # No digits found
                 self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                    "data": "expected-numeric-entity"})
+                                        "data": "expected-numeric-entity"})
                 self.stream.unget(charStack.pop())
                 output = "&" + "".join(charStack)
 
@@ -195,11 +196,11 @@ def consumeEntity(self, allowedChar=None, fromAttribute=False):
             if entityName is not None:
                 if entityName[-1] != ";":
                     self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                      "named-entity-without-semicolon"})
+                                            "named-entity-without-semicolon"})
                 if (entityName[-1] != ";" and fromAttribute and
                     (charStack[entityLength] in asciiLetters or
                      charStack[entityLength] in digits or
-                    charStack[entityLength] == "=")):
+                     charStack[entityLength] == "=")):
                     self.stream.unget(charStack.pop())
                     output = "&" + "".join(charStack)
                 else:
@@ -208,7 +209,7 @@ def consumeEntity(self, allowedChar=None, fromAttribute=False):
                     output += "".join(charStack[entityLength:])
             else:
                 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                  "expected-named-entity"})
+                                        "expected-named-entity"})
                 self.stream.unget(charStack.pop())
                 output = "&" + "".join(charStack)
 
@@ -238,17 +239,15 @@ def emitCurrentToken(self):
                 token["name"] = token["name"].translate(asciiUpper2Lower)
             if token["type"] == tokenTypes["EndTag"]:
                 if token["data"]:
-                    self.tokenQueue.append({"type":tokenTypes["ParseError"],
-                                            "data":"attributes-in-end-tag"})
+                    self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                            "data": "attributes-in-end-tag"})
                 if token["selfClosing"]:
-                    self.tokenQueue.append({"type":tokenTypes["ParseError"],
-                                            "data":"self-closing-flag-on-end-tag"})
+                    self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                            "data": "self-closing-flag-on-end-tag"})
         self.tokenQueue.append(token)
         self.state = self.dataState
 
-
     # Below are the various tokenizer states worked out.
-
     def dataState(self):
         data = self.stream.char()
         if data == "&":
@@ -257,7 +256,7 @@ def dataState(self):
             self.state = self.tagOpenState
         elif data == "\u0000":
             self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data":"invalid-codepoint"})
+                                    "data": "invalid-codepoint"})
             self.tokenQueue.append({"type": tokenTypes["Characters"],
                                     "data": "\u0000"})
         elif data is EOF:
@@ -268,14 +267,14 @@ def dataState(self):
             # state". At that point spaceCharacters are important so they are
             # emitted separately.
             self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data":
-              data + self.stream.charsUntil(spaceCharacters, True)})
+                                    data + self.stream.charsUntil(spaceCharacters, True)})
             # No need to update lastFourChars here, since the first space will
             # have already been appended to lastFourChars and will have broken
             # any <!-- or --> sequences
         else:
             chars = self.stream.charsUntil(("&", "<", "\u0000"))
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
-              data + chars})
+                                    data + chars})
         return True
 
     def entityDataState(self):
@@ -302,14 +301,14 @@ def rcdataState(self):
             # state". At that point spaceCharacters are important so they are
             # emitted separately.
             self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data":
-              data + self.stream.charsUntil(spaceCharacters, True)})
+                                    data + self.stream.charsUntil(spaceCharacters, True)})
             # No need to update lastFourChars here, since the first space will
             # have already been appended to lastFourChars and will have broken
             # any <!-- or --> sequences
         else:
             chars = self.stream.charsUntil(("&", "<"))
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
-              data + chars})
+                                    data + chars})
         return True
 
     def characterReferenceInRcdata(self):
@@ -332,7 +331,7 @@ def rawtextState(self):
         else:
             chars = self.stream.charsUntil(("<", "\u0000"))
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
-              data + chars})
+                                    data + chars})
         return True
 
     def scriptDataState(self):
@@ -350,7 +349,7 @@ def scriptDataState(self):
         else:
             chars = self.stream.charsUntil(("<", "\u0000"))
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
-              data + chars})
+                                    data + chars})
         return True
 
     def plaintextState(self):
@@ -384,20 +383,20 @@ def tagOpenState(self):
             # XXX In theory it could be something besides a tag name. But
             # do we really care?
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-tag-name-but-got-right-bracket"})
+                                    "expected-tag-name-but-got-right-bracket"})
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<>"})
             self.state = self.dataState
         elif data == "?":
             # XXX In theory it could be something besides a tag name. But
             # do we really care?
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-tag-name-but-got-question-mark"})
+                                    "expected-tag-name-but-got-question-mark"})
             self.stream.unget(data)
             self.state = self.bogusCommentState
         else:
             # XXX
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-tag-name"})
+                                    "expected-tag-name"})
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
             self.stream.unget(data)
             self.state = self.dataState
@@ -407,22 +406,22 @@ def closeTagOpenState(self):
         data = self.stream.char()
         if data in asciiLetters:
             self.currentToken = {"type": tokenTypes["EndTag"], "name": data,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.state = self.tagNameState
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-closing-tag-but-got-right-bracket"})
+                                    "expected-closing-tag-but-got-right-bracket"})
             self.state = self.dataState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-closing-tag-but-got-eof"})
+                                    "expected-closing-tag-but-got-eof"})
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
             self.state = self.dataState
         else:
             # XXX data can be _'_...
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-closing-tag-but-got-char",
-              "datavars": {"data": data}})
+                                    "expected-closing-tag-but-got-char",
+                                    "datavars": {"data": data}})
             self.stream.unget(data)
             self.state = self.bogusCommentState
         return True
@@ -435,7 +434,7 @@ def tagNameState(self):
             self.emitCurrentToken()
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-tag-name"})
+                                    "eof-in-tag-name"})
             self.state = self.dataState
         elif data == "/":
             self.state = self.selfClosingStartTagState
@@ -477,17 +476,17 @@ def rcdataEndTagNameState(self):
         if data in spaceCharacters and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.state = self.beforeAttributeNameState
         elif data == "/" and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.state = self.selfClosingStartTagState
         elif data == ">" and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.emitCurrentToken()
             self.state = self.dataState
         elif data in asciiLetters:
@@ -527,17 +526,17 @@ def rawtextEndTagNameState(self):
         if data in spaceCharacters and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.state = self.beforeAttributeNameState
         elif data == "/" and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.state = self.selfClosingStartTagState
         elif data == ">" and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.emitCurrentToken()
             self.state = self.dataState
         elif data in asciiLetters:
@@ -580,17 +579,17 @@ def scriptDataEndTagNameState(self):
         if data in spaceCharacters and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.state = self.beforeAttributeNameState
         elif data == "/" and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.state = self.selfClosingStartTagState
         elif data == ">" and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.emitCurrentToken()
             self.state = self.dataState
         elif data in asciiLetters:
@@ -639,7 +638,7 @@ def scriptDataEscapedState(self):
         else:
             chars = self.stream.charsUntil(("<", "-", "\u0000"))
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
-              data + chars})
+                                    data + chars})
         return True
 
     def scriptDataEscapedDashState(self):
@@ -716,17 +715,17 @@ def scriptDataEscapedEndTagNameState(self):
         if data in spaceCharacters and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.state = self.beforeAttributeNameState
         elif data == "/" and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.state = self.selfClosingStartTagState
         elif data == ">" and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.emitCurrentToken()
             self.state = self.dataState
         elif data in asciiLetters:
@@ -769,7 +768,7 @@ def scriptDataDoubleEscapedState(self):
                                     "data": "\uFFFD"})
         elif data == EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-script-in-script"})
+                                    "eof-in-script-in-script"})
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
@@ -791,14 +790,14 @@ def scriptDataDoubleEscapedDashState(self):
             self.state = self.scriptDataDoubleEscapedState
         elif data == EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-script-in-script"})
+                                    "eof-in-script-in-script"})
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
             self.state = self.scriptDataDoubleEscapedState
         return True
 
-    def scriptDataDoubleEscapedDashState(self):
+    def scriptDataDoubleEscapedDashDashState(self):
         data = self.stream.char()
         if data == "-":
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
@@ -816,7 +815,7 @@ def scriptDataDoubleEscapedDashState(self):
             self.state = self.scriptDataDoubleEscapedState
         elif data == EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-script-in-script"})
+                                    "eof-in-script-in-script"})
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
@@ -863,7 +862,7 @@ def beforeAttributeNameState(self):
             self.state = self.selfClosingStartTagState
         elif data in ("'", '"', "=", "<"):
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "invalid-character-in-attribute-name"})
+                                    "invalid-character-in-attribute-name"})
             self.currentToken["data"].append([data, ""])
             self.state = self.attributeNameState
         elif data == "\u0000":
@@ -873,7 +872,7 @@ def beforeAttributeNameState(self):
             self.state = self.attributeNameState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-attribute-name-but-got-eof"})
+                                    "expected-attribute-name-but-got-eof"})
             self.state = self.dataState
         else:
             self.currentToken["data"].append([data, ""])
@@ -888,7 +887,7 @@ def attributeNameState(self):
             self.state = self.beforeAttributeValueState
         elif data in asciiLetters:
             self.currentToken["data"][-1][0] += data +\
-              self.stream.charsUntil(asciiLetters, True)
+                self.stream.charsUntil(asciiLetters, True)
             leavingThisState = False
         elif data == ">":
             # XXX If we emit here the attributes are converted to a dict
@@ -907,7 +906,7 @@ def attributeNameState(self):
         elif data in ("'", '"', "<"):
             self.tokenQueue.append({"type": tokenTypes["ParseError"],
                                     "data":
-                                        "invalid-character-in-attribute-name"})
+                                    "invalid-character-in-attribute-name"})
             self.currentToken["data"][-1][0] += data
             leavingThisState = False
         elif data is EOF:
@@ -928,7 +927,7 @@ def attributeNameState(self):
             for name, value in self.currentToken["data"][:-1]:
                 if self.currentToken["data"][-1][0] == name:
                     self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                      "duplicate-attribute"})
+                                            "duplicate-attribute"})
                     break
             # XXX Fix for above XXX
             if emitToken:
@@ -955,12 +954,12 @@ def afterAttributeNameState(self):
             self.state = self.attributeNameState
         elif data in ("'", '"', "<"):
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "invalid-character-after-attribute-name"})
+                                    "invalid-character-after-attribute-name"})
             self.currentToken["data"].append([data, ""])
             self.state = self.attributeNameState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-end-of-tag-but-got-eof"})
+                                    "expected-end-of-tag-but-got-eof"})
             self.state = self.dataState
         else:
             self.currentToken["data"].append([data, ""])
@@ -975,12 +974,12 @@ def beforeAttributeValueState(self):
             self.state = self.attributeValueDoubleQuotedState
         elif data == "&":
             self.state = self.attributeValueUnQuotedState
-            self.stream.unget(data);
+            self.stream.unget(data)
         elif data == "'":
             self.state = self.attributeValueSingleQuotedState
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-attribute-value-but-got-right-bracket"})
+                                    "expected-attribute-value-but-got-right-bracket"})
             self.emitCurrentToken()
         elif data == "\u0000":
             self.tokenQueue.append({"type": tokenTypes["ParseError"],
@@ -989,12 +988,12 @@ def beforeAttributeValueState(self):
             self.state = self.attributeValueUnQuotedState
         elif data in ("=", "<", "`"):
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "equals-in-unquoted-attribute-value"})
+                                    "equals-in-unquoted-attribute-value"})
             self.currentToken["data"][-1][1] += data
             self.state = self.attributeValueUnQuotedState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-attribute-value-but-got-eof"})
+                                    "expected-attribute-value-but-got-eof"})
             self.state = self.dataState
         else:
             self.currentToken["data"][-1][1] += data
@@ -1013,11 +1012,11 @@ def attributeValueDoubleQuotedState(self):
             self.currentToken["data"][-1][1] += "\uFFFD"
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-attribute-value-double-quote"})
+                                    "eof-in-attribute-value-double-quote"})
             self.state = self.dataState
         else:
             self.currentToken["data"][-1][1] += data +\
-              self.stream.charsUntil(("\"", "&"))
+                self.stream.charsUntil(("\"", "&"))
         return True
 
     def attributeValueSingleQuotedState(self):
@@ -1032,11 +1031,11 @@ def attributeValueSingleQuotedState(self):
             self.currentToken["data"][-1][1] += "\uFFFD"
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-attribute-value-single-quote"})
+                                    "eof-in-attribute-value-single-quote"})
             self.state = self.dataState
         else:
             self.currentToken["data"][-1][1] += data +\
-              self.stream.charsUntil(("'", "&"))
+                self.stream.charsUntil(("'", "&"))
         return True
 
     def attributeValueUnQuotedState(self):
@@ -1049,7 +1048,7 @@ def attributeValueUnQuotedState(self):
             self.emitCurrentToken()
         elif data in ('"', "'", "=", "<", "`"):
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-character-in-unquoted-attribute-value"})
+                                    "unexpected-character-in-unquoted-attribute-value"})
             self.currentToken["data"][-1][1] += data
         elif data == "\u0000":
             self.tokenQueue.append({"type": tokenTypes["ParseError"],
@@ -1057,11 +1056,11 @@ def attributeValueUnQuotedState(self):
             self.currentToken["data"][-1][1] += "\uFFFD"
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-attribute-value-no-quotes"})
+                                    "eof-in-attribute-value-no-quotes"})
             self.state = self.dataState
         else:
             self.currentToken["data"][-1][1] += data + self.stream.charsUntil(
-              frozenset(("&", ">", '"', "'", "=", "<", "`")) | spaceCharacters)
+                frozenset(("&", ">", '"', "'", "=", "<", "`")) | spaceCharacters)
         return True
 
     def afterAttributeValueState(self):
@@ -1074,12 +1073,12 @@ def afterAttributeValueState(self):
             self.state = self.selfClosingStartTagState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-EOF-after-attribute-value"})
+                                    "unexpected-EOF-after-attribute-value"})
             self.stream.unget(data)
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-character-after-attribute-value"})
+                                    "unexpected-character-after-attribute-value"})
             self.stream.unget(data)
             self.state = self.beforeAttributeNameState
         return True
@@ -1092,12 +1091,12 @@ def selfClosingStartTagState(self):
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"],
                                     "data":
-                                        "unexpected-EOF-after-solidus-in-tag"})
+                                    "unexpected-EOF-after-solidus-in-tag"})
             self.stream.unget(data)
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-character-after-solidus-in-tag"})
+                                    "unexpected-character-after-solidus-in-tag"})
             self.stream.unget(data)
             self.state = self.beforeAttributeNameState
         return True
@@ -1109,7 +1108,7 @@ def bogusCommentState(self):
         data = self.stream.charsUntil(">")
         data = data.replace("\u0000", "\uFFFD")
         self.tokenQueue.append(
-          {"type": tokenTypes["Comment"], "data": data})
+            {"type": tokenTypes["Comment"], "data": data})
 
         # Eat the character directly after the bogus comment which is either a
         # ">" or an EOF.
@@ -1155,7 +1154,7 @@ def markupDeclarationOpenState(self):
                 return True
 
         self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-          "expected-dashes-or-doctype"})
+                                "expected-dashes-or-doctype"})
 
         while charStack:
             self.stream.unget(charStack.pop())
@@ -1172,12 +1171,12 @@ def commentStartState(self):
             self.currentToken["data"] += "\uFFFD"
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "incorrect-comment"})
+                                    "incorrect-comment"})
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-comment"})
+                                    "eof-in-comment"})
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
@@ -1195,12 +1194,12 @@ def commentStartDashState(self):
             self.currentToken["data"] += "-\uFFFD"
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "incorrect-comment"})
+                                    "incorrect-comment"})
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-comment"})
+                                    "eof-in-comment"})
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
@@ -1208,7 +1207,6 @@ def commentStartDashState(self):
             self.state = self.commentState
         return True
 
-
     def commentState(self):
         data = self.stream.char()
         if data == "-":
@@ -1238,7 +1236,7 @@ def commentEndDashState(self):
             self.state = self.commentState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-comment-end-dash"})
+                                    "eof-in-comment-end-dash"})
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
@@ -1258,21 +1256,21 @@ def commentEndState(self):
             self.state = self.commentState
         elif data == "!":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-bang-after-double-dash-in-comment"})
+                                    "unexpected-bang-after-double-dash-in-comment"})
             self.state = self.commentEndBangState
         elif data == "-":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-             "unexpected-dash-after-double-dash-in-comment"})
+                                    "unexpected-dash-after-double-dash-in-comment"})
             self.currentToken["data"] += data
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-comment-double-dash"})
+                                    "eof-in-comment-double-dash"})
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
             # XXX
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-comment"})
+                                    "unexpected-char-in-comment"})
             self.currentToken["data"] += "--" + data
             self.state = self.commentState
         return True
@@ -1292,7 +1290,7 @@ def commentEndBangState(self):
             self.state = self.commentState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-comment-end-bang-state"})
+                                    "eof-in-comment-end-bang-state"})
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
@@ -1306,13 +1304,13 @@ def doctypeState(self):
             self.state = self.beforeDoctypeNameState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-doctype-name-but-got-eof"})
+                                    "expected-doctype-name-but-got-eof"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "need-space-after-doctype"})
+                                    "need-space-after-doctype"})
             self.stream.unget(data)
             self.state = self.beforeDoctypeNameState
         return True
@@ -1323,7 +1321,7 @@ def beforeDoctypeNameState(self):
             pass
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-doctype-name-but-got-right-bracket"})
+                                    "expected-doctype-name-but-got-right-bracket"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
@@ -1334,7 +1332,7 @@ def beforeDoctypeNameState(self):
             self.state = self.doctypeNameState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-doctype-name-but-got-eof"})
+                                    "expected-doctype-name-but-got-eof"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
@@ -1359,7 +1357,7 @@ def doctypeNameState(self):
             self.state = self.doctypeNameState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype-name"})
+                                    "eof-in-doctype-name"})
             self.currentToken["correct"] = False
             self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
             self.tokenQueue.append(self.currentToken)
@@ -1379,7 +1377,7 @@ def afterDoctypeNameState(self):
             self.currentToken["correct"] = False
             self.stream.unget(data)
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
@@ -1412,8 +1410,8 @@ def afterDoctypeNameState(self):
             # and needs to be ungetted
             self.stream.unget(data)
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                "expected-space-or-right-bracket-in-doctype", "datavars":
-                {"data": data}})
+                                    "expected-space-or-right-bracket-in-doctype", "datavars":
+                                    {"data": data}})
             self.currentToken["correct"] = False
             self.state = self.bogusDoctypeState
 
@@ -1425,12 +1423,12 @@ def afterDoctypePublicKeywordState(self):
             self.state = self.beforeDoctypePublicIdentifierState
         elif data in ("'", '"'):
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-doctype"})
+                                    "unexpected-char-in-doctype"})
             self.stream.unget(data)
             self.state = self.beforeDoctypePublicIdentifierState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
@@ -1451,19 +1449,19 @@ def beforeDoctypePublicIdentifierState(self):
             self.state = self.doctypePublicIdentifierSingleQuotedState
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-end-of-doctype"})
+                                    "unexpected-end-of-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-doctype"})
+                                    "unexpected-char-in-doctype"})
             self.currentToken["correct"] = False
             self.state = self.bogusDoctypeState
         return True
@@ -1478,13 +1476,13 @@ def doctypePublicIdentifierDoubleQuotedState(self):
             self.currentToken["publicId"] += "\uFFFD"
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-end-of-doctype"})
+                                    "unexpected-end-of-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
@@ -1502,13 +1500,13 @@ def doctypePublicIdentifierSingleQuotedState(self):
             self.currentToken["publicId"] += "\uFFFD"
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-end-of-doctype"})
+                                    "unexpected-end-of-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
@@ -1525,23 +1523,23 @@ def afterDoctypePublicIdentifierState(self):
             self.state = self.dataState
         elif data == '"':
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-doctype"})
+                                    "unexpected-char-in-doctype"})
             self.currentToken["systemId"] = ""
             self.state = self.doctypeSystemIdentifierDoubleQuotedState
         elif data == "'":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-doctype"})
+                                    "unexpected-char-in-doctype"})
             self.currentToken["systemId"] = ""
             self.state = self.doctypeSystemIdentifierSingleQuotedState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-doctype"})
+                                    "unexpected-char-in-doctype"})
             self.currentToken["correct"] = False
             self.state = self.bogusDoctypeState
         return True
@@ -1561,13 +1559,13 @@ def betweenDoctypePublicAndSystemIdentifiersState(self):
             self.state = self.doctypeSystemIdentifierSingleQuotedState
         elif data == EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-doctype"})
+                                    "unexpected-char-in-doctype"})
             self.currentToken["correct"] = False
             self.state = self.bogusDoctypeState
         return True
@@ -1578,12 +1576,12 @@ def afterDoctypeSystemKeywordState(self):
             self.state = self.beforeDoctypeSystemIdentifierState
         elif data in ("'", '"'):
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-doctype"})
+                                    "unexpected-char-in-doctype"})
             self.stream.unget(data)
             self.state = self.beforeDoctypeSystemIdentifierState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
@@ -1604,19 +1602,19 @@ def beforeDoctypeSystemIdentifierState(self):
             self.state = self.doctypeSystemIdentifierSingleQuotedState
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-doctype"})
+                                    "unexpected-char-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-doctype"})
+                                    "unexpected-char-in-doctype"})
             self.currentToken["correct"] = False
             self.state = self.bogusDoctypeState
         return True
@@ -1631,13 +1629,13 @@ def doctypeSystemIdentifierDoubleQuotedState(self):
             self.currentToken["systemId"] += "\uFFFD"
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-end-of-doctype"})
+                                    "unexpected-end-of-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
@@ -1655,13 +1653,13 @@ def doctypeSystemIdentifierSingleQuotedState(self):
             self.currentToken["systemId"] += "\uFFFD"
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-end-of-doctype"})
+                                    "unexpected-end-of-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
@@ -1678,13 +1676,13 @@ def afterDoctypeSystemIdentifierState(self):
             self.state = self.dataState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-doctype"})
+                                    "unexpected-char-in-doctype"})
             self.state = self.bogusDoctypeState
         return True
 
@@ -1719,7 +1717,7 @@ def cdataSectionState(self):
                     data.append(char)
 
         data = "".join(data)
-        #Deal with null here rather than in the parser
+        # Deal with null here rather than in the parser
         nullCount = data.count("\u0000")
         if nullCount > 0:
             for i in range(nullCount):
diff --git a/html5lib/treebuilders/__init__.py b/html5lib/treebuilders/__init__.py
index 122fdc2e..393309fd 100755
--- a/html5lib/treebuilders/__init__.py
+++ b/html5lib/treebuilders/__init__.py
@@ -34,7 +34,6 @@
 
 treeBuilderCache = {}
 
-import sys
 
 def getTreeBuilder(treeType, implementation=None, **kwargs):
     """Get a TreeBuilder class for various types of tree with built-in support
@@ -61,7 +60,7 @@ def getTreeBuilder(treeType, implementation=None, **kwargs):
         if treeType == "dom":
             from . import dom
             # XXX: Keep backwards compatibility by using minidom if no implementation is given
-            if implementation == None:
+            if implementation is None:
                 from xml.dom import minidom
                 implementation = minidom
             # XXX: NEVER cache here, caching is done in the dom submodule
@@ -74,7 +73,7 @@ def getTreeBuilder(treeType, implementation=None, **kwargs):
             treeBuilderCache[treeType] = etree_lxml.TreeBuilder
         elif treeType == "etree":
             # Come up with a sane default
-            if implementation == None:
+            if implementation is None:
                 try:
                     import xml.etree.cElementTree as ET
                 except ImportError:
@@ -90,5 +89,5 @@ def getTreeBuilder(treeType, implementation=None, **kwargs):
             # NEVER cache here, caching is done in the etree submodule
             return etree.getETreeModule(implementation, **kwargs).TreeBuilder
         else:
-            raise ValueError("""Unrecognised treebuilder "%s" """%treeType)
+            raise ValueError("""Unrecognised treebuilder "%s" """ % treeType)
     return treeBuilderCache.get(treeType)
diff --git a/html5lib/treebuilders/_base.py b/html5lib/treebuilders/_base.py
index 9da17791..90846e77 100755
--- a/html5lib/treebuilders/_base.py
+++ b/html5lib/treebuilders/_base.py
@@ -9,15 +9,15 @@
 Marker = None
 
 listElementsMap = {
-    None:(frozenset(scopingElements), False),
-    "button":(frozenset(scopingElements | set([(namespaces["html"], "button")])), False),
-    "list":(frozenset(scopingElements | set([(namespaces["html"], "ol"),
-                                   (namespaces["html"], "ul")])), False),
-    "table":(frozenset([(namespaces["html"], "html"),
-                  (namespaces["html"], "table")]), False),
-    "select":(frozenset([(namespaces["html"], "optgroup"),
-                   (namespaces["html"], "option")]), True)
-    }
+    None: (frozenset(scopingElements), False),
+    "button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False),
+    "list": (frozenset(scopingElements | set([(namespaces["html"], "ol"),
+                                              (namespaces["html"], "ul")])), False),
+    "table": (frozenset([(namespaces["html"], "html"),
+                         (namespaces["html"], "table")]), False),
+    "select": (frozenset([(namespaces["html"], "optgroup"),
+                          (namespaces["html"], "option")]), True)
+}
 
 
 class Node(object):
@@ -40,13 +40,13 @@ def __init__(self, name):
         self._flags = []
 
     def __str__(self):
-        attributesStr =  " ".join(["%s=\"%s\""%(name, value)
-                                   for name, value in
-                                   self.attributes.items()])
+        attributesStr = " ".join(["%s=\"%s\"" % (name, value)
+                                  for name, value in
+                                  self.attributes.items()])
         if attributesStr:
-            return "<%s %s>"%(self.name,attributesStr)
+            return "<%s %s>" % (self.name, attributesStr)
         else:
-            return "<%s>"%(self.name)
+            return "<%s>" % (self.name)
 
     def __repr__(self):
         return "<%s>" % (self.name)
@@ -78,7 +78,7 @@ def reparentChildren(self, newParent):
         This is needed so that trees that don't store text as nodes move the
         text in the correct way
         """
-        #XXX - should this method be made more general?
+        # XXX - should this method be made more general?
         for child in self.childNodes:
             newParent.appendChild(child)
         self.childNodes = []
@@ -89,12 +89,12 @@ def cloneNode(self):
         """
         raise NotImplementedError
 
-
     def hasContent(self):
         """Return true if the node has children or text, false otherwise
         """
         raise NotImplementedError
 
+
 class ActiveFormattingElements(list):
     def append(self, node):
         equalCount = 0
@@ -118,6 +118,7 @@ def nodesEqual(self, node1, node2):
 
         return True
 
+
 class TreeBuilder(object):
     """Base treebuilder implementation
     documentClass - the class to use for the bottommost node of a document
@@ -126,19 +127,19 @@ class TreeBuilder(object):
     doctypeClass - the class to use for doctypes
     """
 
-    #Document class
+    # Document class
     documentClass = None
 
-    #The class to use for creating a node
+    # The class to use for creating a node
     elementClass = None
 
-    #The class to use for creating comments
+    # The class to use for creating comments
     commentClass = None
 
-    #The class to use for creating doctypes
+    # The class to use for creating doctypes
     doctypeClass = None
 
-    #Fragment class
+    # Fragment class
     fragmentClass = None
 
     def __init__(self, namespaceHTMLElements):
@@ -152,7 +153,7 @@ def reset(self):
         self.openElements = []
         self.activeFormattingElements = ActiveFormattingElements()
 
-        #XXX - rename these to headElement, formElement
+        # XXX - rename these to headElement, formElement
         self.headPointer = None
         self.formPointer = None
 
@@ -162,20 +163,20 @@ def reset(self):
 
     def elementInScope(self, target, variant=None):
 
-        #If we pass a node in we match that. if we pass a string
-        #match any node with that name
+        # If we pass a node in we match that. if we pass a string
+        # match any node with that name
         exactNode = hasattr(target, "nameTuple")
 
         listElements, invert = listElementsMap[variant]
 
         for node in reversed(self.openElements):
             if (node.name == target and not exactNode or
-                node == target and exactNode):
+                    node == target and exactNode):
                 return True
             elif (invert ^ (node.nameTuple in listElements)):
                 return False
 
-        assert False # We should never reach this point
+        assert False  # We should never reach this point
 
     def reconstructActiveFormattingElements(self):
         # Within this algorithm the order of steps described in the
@@ -195,7 +196,7 @@ def reconstructActiveFormattingElements(self):
         # Step 6
         while entry != Marker and entry not in self.openElements:
             if i == 0:
-                #This will be reset to 0 below
+                # This will be reset to 0 below
                 i = -1
                 break
             i -= 1
@@ -208,13 +209,13 @@ def reconstructActiveFormattingElements(self):
 
             # Step 8
             entry = self.activeFormattingElements[i]
-            clone = entry.cloneNode() #Mainly to get a new copy of the attributes
+            clone = entry.cloneNode()  # Mainly to get a new copy of the attributes
 
             # Step 9
-            element = self.insertElement({"type":"StartTag",
-                                          "name":clone.name,
-                                          "namespace":clone.namespace,
-                                          "data":clone.attributes})
+            element = self.insertElement({"type": "StartTag",
+                                          "name": clone.name,
+                                          "namespace": clone.namespace,
+                                          "data": clone.attributes})
 
             # Step 10
             self.activeFormattingElements[i] = element
@@ -284,7 +285,7 @@ def _setInsertFromTable(self, value):
 
     def insertElementNormal(self, token):
         name = token["name"]
-        assert isinstance(name, text_type), "Element %s not unicode"%name
+        assert isinstance(name, text_type), "Element %s not unicode" % name
         namespace = token.get("namespace", self.defaultNamespace)
         element = self.elementClass(name, namespace)
         element.attributes = token["data"]
@@ -298,8 +299,8 @@ def insertElementTable(self, token):
         if self.openElements[-1].name not in tableInsertModeElements:
             return self.insertElementNormal(token)
         else:
-            #We should be in the InTable mode. This means we want to do
-            #special magic element rearranging
+            # We should be in the InTable mode. This means we want to do
+            # special magic element rearranging
             parent, insertBefore = self.getTableMisnestedNodePosition()
             if insertBefore is None:
                 parent.appendChild(element)
@@ -329,7 +330,7 @@ def getTableMisnestedNodePosition(self):
         # The foster parent element is the one which comes before the most
         # recently opened table element
         # XXX - this is really inelegant
-        lastTable=None
+        lastTable = None
         fosterParent = None
         insertBefore = None
         for elm in self.openElements[::-1]:
@@ -353,7 +354,7 @@ def generateImpliedEndTags(self, exclude=None):
         name = self.openElements[-1].name
         # XXX td, th and tr are not actually needed
         if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"))
-            and name != exclude):
+                and name != exclude):
             self.openElements.pop()
             # XXX This is not entirely what the specification says. We should
             # investigate it more closely.
@@ -365,7 +366,7 @@ def getDocument(self):
 
     def getFragment(self):
         "Return the final fragment"
-        #assert self.innerHTML
+        # assert self.innerHTML
         fragment = self.fragmentClass()
         self.openElements[0].reparentChildren(fragment)
         return fragment
diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py
index 7c6358b7..55f34f3f 100644
--- a/html5lib/treebuilders/dom.py
+++ b/html5lib/treebuilders/dom.py
@@ -2,30 +2,37 @@
 
 
 from xml.dom import minidom, Node, XML_NAMESPACE, XMLNS_NAMESPACE
-import re
 import weakref
 
 from . import _base
-from html5lib import constants, ihatexml
+from html5lib import constants
 from html5lib.constants import namespaces
 from html5lib.utils import moduleFactoryFactory
 
+
 def getDomBuilder(DomImplementation):
     Dom = DomImplementation
+
     class AttrList(object):
         def __init__(self, element):
             self.element = element
+
         def __iter__(self):
             return list(self.element.attributes.items()).__iter__()
+
         def __setitem__(self, name, value):
             self.element.setAttribute(name, value)
+
         def __len__(self):
             return len(list(self.element.attributes.items()))
+
         def items(self):
             return [(item[0], item[1]) for item in
-                     list(self.element.attributes.items())]
+                    list(self.element.attributes.items())]
+
         def keys(self):
             return list(self.element.attributes.keys())
+
         def __getitem__(self, name):
             return self.element.getAttribute(name)
 
@@ -40,7 +47,7 @@ def __init__(self, element):
             _base.Node.__init__(self, element.nodeName)
             self.element = element
 
-        namespace = property(lambda self:hasattr(self.element, "namespaceURI")
+        namespace = property(lambda self: hasattr(self.element, "namespaceURI")
                              and self.element.namespaceURI or None)
 
         def appendChild(self, node):
@@ -95,7 +102,7 @@ def hasContent(self):
             return self.element.hasChildNodes()
 
         def getNameTuple(self):
-            if self.namespace == None:
+            if self.namespace is None:
                 return namespaces["html"], self.name
             else:
                 return self.namespace, self.name
@@ -104,7 +111,7 @@ def getNameTuple(self):
 
     class TreeBuilder(_base.TreeBuilder):
         def documentClass(self):
-            self.dom = Dom.getDOMImplementation().createDocument(None,None,None)
+            self.dom = Dom.getDOMImplementation().createDocument(None, None, None)
             return weakref.proxy(self)
 
         def insertDoctype(self, token):
@@ -145,14 +152,14 @@ def getFragment(self):
             return _base.TreeBuilder.getFragment(self).element
 
         def insertText(self, data, parent=None):
-            data=data
+            data = data
             if parent != self:
                 _base.TreeBuilder.insertText(self, data, parent)
             else:
                 # HACK: allow text nodes as children of the document node
                 if hasattr(self.dom, '_child_node_types'):
                     if not Node.TEXT_NODE in self.dom._child_node_types:
-                        self.dom._child_node_types=list(self.dom._child_node_types)
+                        self.dom._child_node_types = list(self.dom._child_node_types)
                         self.dom._child_node_types.append(Node.TEXT_NODE)
                 self.dom.appendChild(self.dom.createTextNode(data))
 
@@ -161,34 +168,35 @@ def insertText(self, data, parent=None):
     def testSerializer(element):
         element.normalize()
         rv = []
+
         def serializeElement(element, indent=0):
             if element.nodeType == Node.DOCUMENT_TYPE_NODE:
                 if element.name:
                     if element.publicId or element.systemId:
                         publicId = element.publicId or ""
                         systemId = element.systemId or ""
-                        rv.append( """|%s<!DOCTYPE %s "%s" "%s">"""%(
-                                ' '*indent, element.name, publicId, systemId))
+                        rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
+                                  (' ' * indent, element.name, publicId, systemId))
                     else:
-                        rv.append("|%s<!DOCTYPE %s>"%(' '*indent, element.name))
+                        rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, element.name))
                 else:
-                    rv.append("|%s<!DOCTYPE >"%(' '*indent,))
+                    rv.append("|%s<!DOCTYPE >" % (' ' * indent,))
             elif element.nodeType == Node.DOCUMENT_NODE:
                 rv.append("#document")
             elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
                 rv.append("#document-fragment")
             elif element.nodeType == Node.COMMENT_NODE:
-                rv.append("|%s<!-- %s -->"%(' '*indent, element.nodeValue))
+                rv.append("|%s<!-- %s -->" % (' ' * indent, element.nodeValue))
             elif element.nodeType == Node.TEXT_NODE:
-                rv.append("|%s\"%s\"" %(' '*indent, element.nodeValue))
+                rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue))
             else:
                 if (hasattr(element, "namespaceURI") and
-                    element.namespaceURI != None):
-                    name = "%s %s"%(constants.prefixes[element.namespaceURI],
-                                    element.nodeName)
+                        element.namespaceURI is not None):
+                    name = "%s %s" % (constants.prefixes[element.namespaceURI],
+                                      element.nodeName)
                 else:
                     name = element.nodeName
-                rv.append("|%s<%s>"%(' '*indent, name))
+                rv.append("|%s<%s>" % (' ' * indent, name))
                 if element.hasAttributes():
                     attributes = []
                     for i in range(len(element.attributes)):
@@ -197,13 +205,13 @@ def serializeElement(element, indent=0):
                         value = attr.value
                         ns = attr.namespaceURI
                         if ns:
-                            name = "%s %s"%(constants.prefixes[ns], attr.localName)
+                            name = "%s %s" % (constants.prefixes[ns], attr.localName)
                         else:
                             name = attr.nodeName
                         attributes.append((name, value))
 
                     for name, value in sorted(attributes):
-                        rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
+                        rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
             indent += 2
             for child in element.childNodes:
                 serializeElement(child, indent)
@@ -211,63 +219,68 @@ def serializeElement(element, indent=0):
 
         return "\n".join(rv)
 
-    def dom2sax(node, handler, nsmap={'xml':XML_NAMESPACE}):
-      if node.nodeType == Node.ELEMENT_NODE:
-        if not nsmap:
-          handler.startElement(node.nodeName, node.attributes)
-          for child in node.childNodes: dom2sax(child, handler, nsmap)
-          handler.endElement(node.nodeName)
+    def dom2sax(node, handler, nsmap={'xml': XML_NAMESPACE}):
+        if node.nodeType == Node.ELEMENT_NODE:
+            if not nsmap:
+                handler.startElement(node.nodeName, node.attributes)
+                for child in node.childNodes:
+                    dom2sax(child, handler, nsmap)
+                handler.endElement(node.nodeName)
+            else:
+                attributes = dict(node.attributes.itemsNS())
+
+                # gather namespace declarations
+                prefixes = []
+                for attrname in list(node.attributes.keys()):
+                    attr = node.getAttributeNode(attrname)
+                    if (attr.namespaceURI == XMLNS_NAMESPACE or
+                       (attr.namespaceURI is None and attr.nodeName.startswith('xmlns'))):
+                        prefix = (attr.nodeName != 'xmlns' and attr.nodeName or None)
+                        handler.startPrefixMapping(prefix, attr.nodeValue)
+                        prefixes.append(prefix)
+                        nsmap = nsmap.copy()
+                        nsmap[prefix] = attr.nodeValue
+                        del attributes[(attr.namespaceURI, attr.nodeName)]
+
+                # apply namespace declarations
+                for attrname in list(node.attributes.keys()):
+                    attr = node.getAttributeNode(attrname)
+                    if attr.namespaceURI is None and ':' in attr.nodeName:
+                        prefix = attr.nodeName.split(':')[0]
+                        if prefix in nsmap:
+                            del attributes[(attr.namespaceURI, attr.nodeName)]
+                            attributes[(nsmap[prefix], attr.nodeName)] = attr.nodeValue
+
+                # SAX events
+                ns = node.namespaceURI or nsmap.get(None, None)
+                handler.startElementNS((ns, node.nodeName), node.nodeName, attributes)
+                for child in node.childNodes:
+                    dom2sax(child, handler, nsmap)
+                handler.endElementNS((ns, node.nodeName), node.nodeName)
+                for prefix in prefixes:
+                    handler.endPrefixMapping(prefix)
+
+        elif node.nodeType in [Node.TEXT_NODE, Node.CDATA_SECTION_NODE]:
+            handler.characters(node.nodeValue)
+
+        elif node.nodeType == Node.DOCUMENT_NODE:
+            handler.startDocument()
+            for child in node.childNodes:
+                dom2sax(child, handler, nsmap)
+            handler.endDocument()
+
+        elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
+            for child in node.childNodes:
+                dom2sax(child, handler, nsmap)
+
         else:
-          attributes = dict(node.attributes.itemsNS())
-
-          # gather namespace declarations
-          prefixes = []
-          for attrname in list(node.attributes.keys()):
-            attr = node.getAttributeNode(attrname)
-            if (attr.namespaceURI == XMLNS_NAMESPACE or
-               (attr.namespaceURI == None and attr.nodeName.startswith('xmlns'))):
-              prefix = (attr.nodeName != 'xmlns' and attr.nodeName or None)
-              handler.startPrefixMapping(prefix, attr.nodeValue)
-              prefixes.append(prefix)
-              nsmap = nsmap.copy()
-              nsmap[prefix] = attr.nodeValue
-              del attributes[(attr.namespaceURI, attr.nodeName)]
-
-          # apply namespace declarations
-          for attrname in list(node.attributes.keys()):
-            attr = node.getAttributeNode(attrname)
-            if attr.namespaceURI == None and ':' in attr.nodeName:
-              prefix = attr.nodeName.split(':')[0]
-              if prefix in nsmap:
-                del attributes[(attr.namespaceURI, attr.nodeName)]
-                attributes[(nsmap[prefix],attr.nodeName)]=attr.nodeValue
-
-          # SAX events
-          ns = node.namespaceURI or nsmap.get(None,None)
-          handler.startElementNS((ns,node.nodeName), node.nodeName, attributes)
-          for child in node.childNodes: dom2sax(child, handler, nsmap)
-          handler.endElementNS((ns, node.nodeName), node.nodeName)
-          for prefix in prefixes: handler.endPrefixMapping(prefix)
-
-      elif node.nodeType in [Node.TEXT_NODE, Node.CDATA_SECTION_NODE]:
-        handler.characters(node.nodeValue)
-
-      elif node.nodeType == Node.DOCUMENT_NODE:
-        handler.startDocument()
-        for child in node.childNodes: dom2sax(child, handler, nsmap)
-        handler.endDocument()
-
-      elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
-        for child in node.childNodes: dom2sax(child, handler, nsmap)
-
-      else:
-        # ATTRIBUTE_NODE
-        # ENTITY_NODE
-        # PROCESSING_INSTRUCTION_NODE
-        # COMMENT_NODE
-        # DOCUMENT_TYPE_NODE
-        # NOTATION_NODE
-        pass
+            # ATTRIBUTE_NODE
+            # ENTITY_NODE
+            # PROCESSING_INSTRUCTION_NODE
+            # COMMENT_NODE
+            # DOCUMENT_TYPE_NODE
+            # NOTATION_NODE
+            pass
 
     return locals()
 
@@ -279,4 +292,4 @@ def dom2sax(node, handler, nsmap={'xml':XML_NAMESPACE}):
 # Keep backwards compatibility with things that directly load
 # classes/functions from this module
 for key, value in list(getDomModule(minidom).__dict__.items()):
-	globals()[key] = value
+    globals()[key] = value
diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py
index 48c3ce7c..018b6606 100755
--- a/html5lib/treebuilders/etree.py
+++ b/html5lib/treebuilders/etree.py
@@ -11,9 +11,11 @@
 
 tag_regexp = re.compile("{([^}]*)}(.*)")
 
+
 def getETreeBuilder(ElementTreeImplementation, fullTree=False):
     ElementTree = ElementTreeImplementation
     ElementTreeCommentType = ElementTree.Comment("asd").tag
+
     class Element(_base.Node):
         def __init__(self, name, namespace=None):
             self._name = name
@@ -32,7 +34,7 @@ def _getETreeTag(self, name, namespace):
             if namespace is None:
                 etree_tag = name
             else:
-                etree_tag = "{%s}%s"%(namespace, name)
+                etree_tag = "{%s}%s" % (namespace, name)
             return etree_tag
 
         def _setName(self, name):
@@ -57,13 +59,13 @@ def _getAttributes(self):
             return self._element.attrib
 
         def _setAttributes(self, attributes):
-            #Delete existing attributes first
-            #XXX - there may be a better way to do this...
+            # Delete existing attributes first
+            # XXX - there may be a better way to do this...
             for key in list(self._element.attrib.keys()):
                 del self._element.attrib[key]
             for key, value in attributes.items():
                 if isinstance(key, tuple):
-                    name = "{%s}%s"%(key[2], key[1])
+                    name = "{%s}%s" % (key[2], key[1])
                 else:
                     name = key
                 self._element.set(name, value)
@@ -72,6 +74,7 @@ def _setAttributes(self, attributes):
 
         def _getChildNodes(self):
             return self._childNodes
+
         def _setChildNodes(self, value):
             del self._element[:]
             self._childNodes = []
@@ -96,7 +99,7 @@ def insertBefore(self, node, refNode):
 
         def removeChild(self, node):
             self._element.remove(node._element)
-            node.parent=None
+            node.parent = None
 
         def insertText(self, data, insertBefore=None):
             if not(len(self._element)):
@@ -104,18 +107,18 @@ def insertText(self, data, insertBefore=None):
                     self._element.text = ""
                 self._element.text += data
             elif insertBefore is None:
-                #Insert the text as the tail of the last child element
+                # Insert the text as the tail of the last child element
                 if not self._element[-1].tail:
                     self._element[-1].tail = ""
                 self._element[-1].tail += data
             else:
-                #Insert the text before the specified node
+                # Insert the text before the specified node
                 children = list(self._element)
                 index = children.index(insertBefore._element)
                 if index > 0:
-                    if not self._element[index-1].tail:
-                        self._element[index-1].tail = ""
-                    self._element[index-1].tail += data
+                    if not self._element[index - 1].tail:
+                        self._element[index - 1].tail = ""
+                    self._element[index - 1].tail += data
                 else:
                     if not self._element.text:
                         self._element.text = ""
@@ -140,8 +143,8 @@ def reparentChildren(self, newParent):
 
     class Comment(Element):
         def __init__(self, data):
-            #Use the superclass constructor to set all properties on the
-            #wrapper element
+            # Use the superclass constructor to set all properties on the
+            # wrapper element
             self._element = ElementTree.Comment(data)
             self.parent = None
             self._childNodes = []
@@ -190,7 +193,7 @@ def __init__(self):
 
     def testSerializer(element):
         rv = []
-        finalText = None
+
         def serializeElement(element, indent=0):
             if not(hasattr(element, "tag")):
                 element = element.getroot()
@@ -198,20 +201,23 @@ def serializeElement(element, indent=0):
                 if element.get("publicId") or element.get("systemId"):
                     publicId = element.get("publicId") or ""
                     systemId = element.get("systemId") or ""
-                    rv.append( """<!DOCTYPE %s "%s" "%s">"""%(
-                            element.text, publicId, systemId))
+                    rv.append("""<!DOCTYPE %s "%s" "%s">""" %
+                              (element.text, publicId, systemId))
                 else:
-                    rv.append("<!DOCTYPE %s>"%(element.text,))
+                    rv.append("<!DOCTYPE %s>" % (element.text,))
             elif element.tag == "DOCUMENT_ROOT":
                 rv.append("#document")
-                if element.text:
-                    rv.append("|%s\"%s\""%(' '*(indent+2), element.text))
-                if element.tail:
-                    finalText = element.tail
+                if element.text is not None:
+                    rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
+                if element.tail is not None:
+                    raise TypeError("Document node cannot have tail")
+                if hasattr(element, "attrib") and len(element.attrib):
+                    raise TypeError("Document node cannot have attributes")
             elif element.tag == ElementTreeCommentType:
-                rv.append("|%s<!-- %s -->"%(' '*indent, element.text))
+                rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
             else:
-                assert isinstance(element.tag, text_type), "Expected unicode, got %s, %s"%(type(element.tag), element.tag)
+                assert isinstance(element.tag, text_type), \
+                    "Expected unicode, got %s, %s" % (type(element.tag), element.tag)
                 nsmatch = tag_regexp.match(element.tag)
 
                 if nsmatch is None:
@@ -219,8 +225,8 @@ def serializeElement(element, indent=0):
                 else:
                     ns, name = nsmatch.groups()
                     prefix = constants.prefixes[ns]
-                    name = "%s %s"%(prefix, name)
-                rv.append("|%s<%s>"%(' '*indent, name))
+                    name = "%s %s" % (prefix, name)
+                rv.append("|%s<%s>" % (' ' * indent, name))
 
                 if hasattr(element, "attrib"):
                     attributes = []
@@ -229,80 +235,76 @@ def serializeElement(element, indent=0):
                         if nsmatch is not None:
                             ns, name = nsmatch.groups()
                             prefix = constants.prefixes[ns]
-                            attr_string = "%s %s"%(prefix, name)
+                            attr_string = "%s %s" % (prefix, name)
                         else:
                             attr_string = name
                         attributes.append((attr_string, value))
 
                     for name, value in sorted(attributes):
-                        rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
+                        rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
                 if element.text:
-                    rv.append("|%s\"%s\"" %(' '*(indent+2), element.text))
+                    rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
             indent += 2
             for child in element:
                 serializeElement(child, indent)
             if element.tail:
-                rv.append("|%s\"%s\"" %(' '*(indent-2), element.tail))
+                rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
         serializeElement(element, 0)
 
-        if finalText is not None:
-            rv.append("|%s\"%s\""%(' '*2, finalText))
-
         return "\n".join(rv)
 
     def tostring(element):
         """Serialize an element and its child nodes to a string"""
         rv = []
-        finalText = None
         filter = ihatexml.InfosetFilter()
+
         def serializeElement(element):
-            if type(element) == type(ElementTree.ElementTree):
+            if isinstance(element, ElementTree.ElementTree):
                 element = element.getroot()
 
             if element.tag == "<!DOCTYPE>":
                 if element.get("publicId") or element.get("systemId"):
                     publicId = element.get("publicId") or ""
                     systemId = element.get("systemId") or ""
-                    rv.append( """<!DOCTYPE %s PUBLIC "%s" "%s">"""%(
-                            element.text, publicId, systemId))
+                    rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" %
+                              (element.text, publicId, systemId))
                 else:
-                    rv.append("<!DOCTYPE %s>"%(element.text,))
+                    rv.append("<!DOCTYPE %s>" % (element.text,))
             elif element.tag == "DOCUMENT_ROOT":
-                if element.text:
+                if element.text is not None:
                     rv.append(element.text)
-                if element.tail:
-                    finalText = element.tail
+                if element.tail is not None:
+                    raise TypeError("Document node cannot have tail")
+                if hasattr(element, "attrib") and len(element.attrib):
+                    raise TypeError("Document node cannot have attributes")
 
                 for child in element:
                     serializeElement(child)
 
             elif element.tag == ElementTreeCommentType:
-                rv.append("<!--%s-->"%(element.text,))
+                rv.append("<!--%s-->" % (element.text,))
             else:
-                #This is assumed to be an ordinary element
+                # This is assumed to be an ordinary element
                 if not element.attrib:
-                    rv.append("<%s>"%(filter.fromXmlName(element.tag),))
+                    rv.append("<%s>" % (filter.fromXmlName(element.tag),))
                 else:
-                    attr = " ".join(["%s=\"%s\""%(
-                                filter.fromXmlName(name), value)
-                                     for name, value in element.attrib.items()])
-                    rv.append("<%s %s>"%(element.tag, attr))
+                    attr = " ".join(["%s=\"%s\"" % (
+                        filter.fromXmlName(name), value)
+                        for name, value in element.attrib.items()])
+                    rv.append("<%s %s>" % (element.tag, attr))
                 if element.text:
                     rv.append(element.text)
 
                 for child in element:
                     serializeElement(child)
 
-                rv.append("</%s>"%(element.tag,))
+                rv.append("</%s>" % (element.tag,))
 
             if element.tail:
                 rv.append(element.tail)
 
         serializeElement(element)
 
-        if finalText is not None:
-            rv.append("%s\""%(' '*2, finalText))
-
         return "".join(rv)
 
     class TreeBuilder(_base.TreeBuilder):
@@ -321,7 +323,7 @@ def getDocument(self):
             else:
                 if self.defaultNamespace is not None:
                     return self.document._element.find(
-                        "{%s}html"%self.defaultNamespace)
+                        "{%s}html" % self.defaultNamespace)
                 else:
                     return self.document._element.find("html")
 
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index bce09747..6879797e 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -29,6 +29,8 @@
 fullTree = True
 tag_regexp = re.compile("{([^}]*)}(.*)")
 
+comment_type = etree.Comment("asd").tag
+
 
 class DocumentType(object):
     def __init__(self, name, publicId, systemId):
@@ -36,6 +38,7 @@ def __init__(self, name, publicId, systemId):
         self.publicId = publicId
         self.systemId = systemId
 
+
 class Document(object):
     def __init__(self):
         self._elementTree = None
@@ -49,44 +52,46 @@ def _getChildNodes(self):
 
     childNodes = property(_getChildNodes)
 
+
 def testSerializer(element):
     rv = []
     finalText = None
     infosetFilter = ihatexml.InfosetFilter()
+
     def serializeElement(element, indent=0):
         if not hasattr(element, "tag"):
-            if  hasattr(element, "getroot"):
-                #Full tree case
+            if hasattr(element, "getroot"):
+                # Full tree case
                 rv.append("#document")
                 if element.docinfo.internalDTD:
                     if not (element.docinfo.public_id or
                             element.docinfo.system_url):
-                        dtd_str = "<!DOCTYPE %s>"%element.docinfo.root_name
+                        dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
                     else:
-                        dtd_str = """<!DOCTYPE %s "%s" "%s">"""%(
+                        dtd_str = """<!DOCTYPE %s "%s" "%s">""" % (
                             element.docinfo.root_name,
                             element.docinfo.public_id,
                             element.docinfo.system_url)
-                    rv.append("|%s%s"%(' '*(indent+2), dtd_str))
+                    rv.append("|%s%s" % (' ' * (indent + 2), dtd_str))
                 next_element = element.getroot()
                 while next_element.getprevious() is not None:
                     next_element = next_element.getprevious()
                 while next_element is not None:
-                    serializeElement(next_element, indent+2)
+                    serializeElement(next_element, indent + 2)
                     next_element = next_element.getnext()
             elif isinstance(element, str) or isinstance(element, bytes):
-                #Text in a fragment
+                # Text in a fragment
                 assert isinstance(element, str) or sys.version_info.major == 2
-                rv.append("|%s\"%s\""%(' '*indent, element))
+                rv.append("|%s\"%s\"" % (' ' * indent, element))
             else:
-                #Fragment case
+                # Fragment case
                 rv.append("#document-fragment")
                 for next_element in element:
-                    serializeElement(next_element, indent+2)
-        elif type(element.tag) == type(etree.Comment):
-            rv.append("|%s<!-- %s -->"%(' '*indent, element.text))
+                    serializeElement(next_element, indent + 2)
+        elif element.tag == comment_type:
+            rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
             if hasattr(element, "tail") and element.tail:
-                rv.append("|%s\"%s\"" %(' '*indent, element.tail))
+                rv.append("|%s\"%s\"" % (' ' * indent, element.tail))
         else:
             assert isinstance(element, etree._Element)
             nsmatch = etree_builders.tag_regexp.match(element.tag)
@@ -94,11 +99,11 @@ def serializeElement(element, indent=0):
                 ns = nsmatch.group(1)
                 tag = nsmatch.group(2)
                 prefix = constants.prefixes[ns]
-                rv.append("|%s<%s %s>"%(' '*indent, prefix,
-                                        infosetFilter.fromXmlName(tag)))
+                rv.append("|%s<%s %s>" % (' ' * indent, prefix,
+                                          infosetFilter.fromXmlName(tag)))
             else:
-                rv.append("|%s<%s>"%(' '*indent,
-                                     infosetFilter.fromXmlName(element.tag)))
+                rv.append("|%s<%s>" % (' ' * indent,
+                                       infosetFilter.fromXmlName(element.tag)))
 
             if hasattr(element, "attrib"):
                 attributes = []
@@ -108,60 +113,62 @@ def serializeElement(element, indent=0):
                         ns, name = nsmatch.groups()
                         name = infosetFilter.fromXmlName(name)
                         prefix = constants.prefixes[ns]
-                        attr_string = "%s %s"%(prefix, name)
+                        attr_string = "%s %s" % (prefix, name)
                     else:
                         attr_string = infosetFilter.fromXmlName(name)
                     attributes.append((attr_string, value))
 
                 for name, value in sorted(attributes):
-                    rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
+                    rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
 
             if element.text:
-                rv.append("|%s\"%s\"" %(' '*(indent+2), element.text))
+                rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
             indent += 2
             for child in element.getchildren():
                 serializeElement(child, indent)
             if hasattr(element, "tail") and element.tail:
-                rv.append("|%s\"%s\"" %(' '*(indent-2), element.tail))
+                rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
     serializeElement(element, 0)
 
     if finalText is not None:
-        rv.append("|%s\"%s\""%(' '*2, finalText))
+        rv.append("|%s\"%s\"" % (' ' * 2, finalText))
 
     return "\n".join(rv)
 
+
 def tostring(element):
     """Serialize an element and its child nodes to a string"""
     rv = []
     finalText = None
+
     def serializeElement(element):
         if not hasattr(element, "tag"):
             if element.docinfo.internalDTD:
                 if element.docinfo.doctype:
                     dtd_str = element.docinfo.doctype
                 else:
-                    dtd_str = "<!DOCTYPE %s>"%element.docinfo.root_name
+                    dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
                 rv.append(dtd_str)
             serializeElement(element.getroot())
 
-        elif type(element.tag) == type(etree.Comment):
-            rv.append("<!--%s-->"%(element.text,))
+        elif element.tag == comment_type:
+            rv.append("<!--%s-->" % (element.text,))
 
         else:
-            #This is assumed to be an ordinary element
+            # This is assumed to be an ordinary element
             if not element.attrib:
-                rv.append("<%s>"%(element.tag,))
+                rv.append("<%s>" % (element.tag,))
             else:
-                attr = " ".join(["%s=\"%s\""%(name, value)
+                attr = " ".join(["%s=\"%s\"" % (name, value)
                                  for name, value in element.attrib.items()])
-                rv.append("<%s %s>"%(element.tag, attr))
+                rv.append("<%s %s>" % (element.tag, attr))
             if element.text:
                 rv.append(element.text)
 
             for child in element.getchildren():
                 serializeElement(child)
 
-            rv.append("</%s>"%(element.tag,))
+            rv.append("</%s>" % (element.tag,))
 
         if hasattr(element, "tail") and element.tail:
             rv.append(element.tail)
@@ -169,7 +176,7 @@ def serializeElement(element):
     serializeElement(element)
 
     if finalText is not None:
-        rv.append("%s\""%(' '*2, finalText))
+        rv.append("%s\"" % (' ' * 2, finalText))
 
     return "".join(rv)
 
@@ -181,7 +188,7 @@ class TreeBuilder(_base.TreeBuilder):
     commentClass = None
     fragmentClass = Document
 
-    def __init__(self, namespaceHTMLElements, fullTree = False):
+    def __init__(self, namespaceHTMLElements, fullTree=False):
         builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
         infosetFilter = self.infosetFilter = ihatexml.InfosetFilter()
         self.namespaceHTMLElements = namespaceHTMLElements
@@ -192,7 +199,7 @@ def __init__(self, element, value={}):
                 dict.__init__(self, value)
                 for key, value in self.items():
                     if isinstance(key, tuple):
-                        name = "{%s}%s"%(key[2], infosetFilter.coerceAttribute(key[1]))
+                        name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
                     else:
                         name = infosetFilter.coerceAttribute(key)
                     self._element._element.attrib[name] = value
@@ -200,7 +207,7 @@ def __init__(self, element, value={}):
             def __setitem__(self, key, value):
                 dict.__setitem__(self, key, value)
                 if isinstance(key, tuple):
-                    name = "{%s}%s"%(key[2], infosetFilter.coerceAttribute(key[1]))
+                    name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
                 else:
                     name = infosetFilter.coerceAttribute(key)
                 self._element._element.attrib[name] = value
@@ -236,7 +243,6 @@ def insertText(self, data, insertBefore=None):
             def appendChild(self, child):
                 builder.Element.appendChild(self, child)
 
-
         class Comment(builder.Comment):
             def __init__(self, data):
                 data = infosetFilter.coerceComment(data)
@@ -253,7 +259,7 @@ def _getData(self):
 
         self.elementClass = Element
         self.commentClass = builder.Comment
-        #self.fragmentClass = builder.DocumentFragment
+        # self.fragmentClass = builder.DocumentFragment
         _base.TreeBuilder.__init__(self, namespaceHTMLElements)
 
     def reset(self):
@@ -297,23 +303,23 @@ def insertCommentInitial(self, data, parent=None):
 
     def insertCommentMain(self, data, parent=None):
         if (parent == self.document and
-            type(self.document._elementTree.getroot()[-1].tag) == type(etree.Comment)):
+                self.document._elementTree.getroot()[-1].tag == comment_type):
                 warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
         super(TreeBuilder, self).insertComment(data, parent)
 
     def insertRoot(self, token):
         """Create the document root"""
-        #Because of the way libxml2 works, it doesn't seem to be possible to
-        #alter information like the doctype after the tree has been parsed.
-        #Therefore we need to use the built-in parser to create our iniial
-        #tree, after which we can add elements like normal
+        # Because of the way libxml2 works, it doesn't seem to be possible to
+        # alter information like the doctype after the tree has been parsed.
+        # Therefore we need to use the built-in parser to create our iniial
+        # tree, after which we can add elements like normal
         docStr = ""
         if self.doctype and self.doctype.name and not self.doctype.name.startswith('"'):
-            docStr += "<!DOCTYPE %s"%self.doctype.name
+            docStr += "<!DOCTYPE %s" % self.doctype.name
             if (self.doctype.publicId is not None or
-                self.doctype.systemId is not None):
-                docStr += ' PUBLIC "%s" "%s"'%(self.doctype.publicId or "",
-                                               self.doctype.systemId or "")
+                    self.doctype.systemId is not None):
+                docStr += ' PUBLIC "%s" "%s"' % (self.doctype.publicId or "",
+                                                 self.doctype.systemId or "")
             docStr += ">"
             if self.doctype.name != token["name"]:
                 warnings.warn("lxml cannot represent doctype with a different name to the root element", DataLossWarning)
@@ -325,11 +331,11 @@ def insertRoot(self, token):
             print(docStr)
             raise
 
-        #Append the initial comments:
+        # Append the initial comments:
         for comment_token in self.initial_comments:
             root.addprevious(etree.Comment(comment_token["data"]))
 
-        #Create the root document and add the ElementTree to it
+        # Create the root document and add the ElementTree to it
         self.document = self.documentClass()
         self.document._elementTree = root.getroottree()
 
@@ -339,14 +345,14 @@ def insertRoot(self, token):
         if namespace is None:
             etree_tag = name
         else:
-            etree_tag = "{%s}%s"%(namespace, name)
+            etree_tag = "{%s}%s" % (namespace, name)
         root.tag = etree_tag
 
-        #Add the root element to the internal child/open data structures
+        # Add the root element to the internal child/open data structures
         root_element = self.elementClass(name, namespace)
         root_element._element = root
         self.document._childNodes.append(root_element)
         self.openElements.append(root_element)
 
-        #Reset to the default insert comment function
+        # Reset to the default insert comment function
         self.insertComment = self.insertCommentMain
diff --git a/html5lib/treebuilders/simpletree.py b/html5lib/treebuilders/simpletree.py
index f1bc871a..9558f3bd 100755
--- a/html5lib/treebuilders/simpletree.py
+++ b/html5lib/treebuilders/simpletree.py
@@ -6,8 +6,11 @@
 from xml.sax.saxutils import escape
 
 # Really crappy basic implementation of a DOM-core like thing
+
+
 class Node(_base.Node):
     type = -1
+
     def __init__(self, name):
         self.name = name
         self.parent = None
@@ -28,7 +31,7 @@ def toxml(self):
         raise NotImplementedError
 
     def printTree(self, indent=0):
-        tree = '\n|%s%s' % (' '* indent, text_type(self))
+        tree = '\n|%s%s' % (' ' * indent, text_type(self))
         for child in self.childNodes:
             tree += child.printTree(indent + 2)
         return tree
@@ -36,14 +39,14 @@ def printTree(self, indent=0):
     def appendChild(self, node):
         assert isinstance(node, Node)
         if (isinstance(node, TextNode) and self.childNodes and
-          isinstance(self.childNodes[-1], TextNode)):
+           isinstance(self.childNodes[-1], TextNode)):
             self.childNodes[-1].value += node.value
         else:
             self.childNodes.append(node)
         node.parent = self
 
     def insertText(self, data, insertBefore=None):
-        assert isinstance(data, text_type), "data %s is of type %s expected unicode"%(repr(data), type(data))
+        assert isinstance(data, text_type), "data %s is of type %s expected unicode" % (repr(data), type(data))
         if insertBefore is None:
             self.appendChild(TextNode(data))
         else:
@@ -52,7 +55,7 @@ def insertText(self, data, insertBefore=None):
     def insertBefore(self, node, refNode):
         index = self.childNodes.index(refNode)
         if (isinstance(node, TextNode) and index > 0 and
-          isinstance(self.childNodes[index - 1], TextNode)):
+           isinstance(self.childNodes[index - 1], TextNode)):
             self.childNodes[index - 1].value += node.value
         else:
             self.childNodes.insert(index, node)
@@ -74,15 +77,17 @@ def hasContent(self):
         return bool(self.childNodes)
 
     def getNameTuple(self):
-        if self.namespace == None:
+        if self.namespace is None:
             return namespaces["html"], self.name
         else:
             return self.namespace, self.name
 
     nameTuple = property(getNameTuple)
 
+
 class Document(Node):
     type = 1
+
     def __init__(self):
         Node.__init__(self, None)
 
@@ -113,16 +118,20 @@ def printTree(self):
     def cloneNode(self):
         return Document()
 
+
 class DocumentFragment(Document):
     type = 2
+
     def __str__(self):
         return "#document-fragment"
 
     def cloneNode(self):
         return DocumentFragment()
 
+
 class DocumentType(Node):
     type = 3
+
     def __init__(self, name, publicId, systemId):
         Node.__init__(self, name)
         self.publicId = publicId
@@ -132,13 +141,12 @@ def __str__(self):
         if self.publicId or self.systemId:
             publicId = self.publicId or ""
             systemId = self.systemId or ""
-            return """<!DOCTYPE %s "%s" "%s">"""%(
+            return """<!DOCTYPE %s "%s" "%s">""" % (
                 self.name, publicId, systemId)
 
         else:
             return "<!DOCTYPE %s>" % self.name
 
-
     toxml = __str__
 
     def hilite(self):
@@ -147,8 +155,10 @@ def hilite(self):
     def cloneNode(self):
         return DocumentType(self.name, self.publicId, self.systemId)
 
+
 class TextNode(Node):
     type = 4
+
     def __init__(self, value):
         Node.__init__(self, None)
         self.value = value
@@ -165,24 +175,26 @@ def cloneNode(self):
         assert isinstance(self.value, str)
         return TextNode(self.value)
 
+
 class Element(Node):
     type = 5
+
     def __init__(self, name, namespace=None):
         Node.__init__(self, name)
         self.namespace = namespace
         self.attributes = {}
 
     def __str__(self):
-        if self.namespace == None:
+        if self.namespace is None:
             return "<%s>" % self.name
         else:
-            return "<%s %s>"%(prefixes[self.namespace], self.name)
+            return "<%s %s>" % (prefixes[self.namespace], self.name)
 
     def toxml(self):
         result = '<' + self.name
         if self.attributes:
-            for name,value in self.attributes.items():
-                result += ' %s="%s"' % (name, escape(value,{'"':'&quot;'}))
+            for name, value in self.attributes.items():
+                result += ' %s="%s"' % (name, escape(value, {'"': '&quot;'}))
         if self.childNodes:
             result += '>'
             for child in self.childNodes:
@@ -196,7 +208,7 @@ def hilite(self):
         result = '&lt;<code class="markup element-name">%s</code>' % self.name
         if self.attributes:
             for name, value in self.attributes.items():
-                result += ' <code class="markup attribute-name">%s</code>=<code class="markup attribute-value">"%s"</code>' % (name, escape(value, {'"':'&quot;'}))
+                result += ' <code class="markup attribute-name">%s</code>=<code class="markup attribute-value">"%s"</code>' % (name, escape(value, {'"': '&quot;'}))
         if self.childNodes:
             result += ">"
             for child in self.childNodes:
@@ -206,12 +218,12 @@ def hilite(self):
         return result + '&lt;/<code class="markup element-name">%s</code>>' % self.name
 
     def printTree(self, indent):
-        tree = '\n|%s%s' % (' '*indent, text_type(self))
+        tree = '\n|%s%s' % (' ' * indent, text_type(self))
         indent += 2
         if self.attributes:
             for name, value in sorted(self.attributes.items()):
                 if isinstance(name, tuple):
-                    name = "%s %s"%(name[0], name[1])
+                    name = "%s %s" % (name[0], name[1])
                 tree += '\n|%s%s="%s"' % (' ' * indent, name, value)
         for child in self.childNodes:
             tree += child.printTree(indent)
@@ -223,8 +235,10 @@ def cloneNode(self):
             newNode.attributes[attr] = value
         return newNode
 
+
 class CommentNode(Node):
     type = 6
+
     def __init__(self, data):
         Node.__init__(self, None)
         self.data = data
@@ -241,6 +255,7 @@ def hilite(self):
     def cloneNode(self):
         return CommentNode(self.data)
 
+
 class TreeBuilder(_base.TreeBuilder):
     documentClass = Document
     doctypeClass = DocumentType
diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py
index 3d7de83f..bec625ce 100644
--- a/html5lib/treewalkers/__init__.py
+++ b/html5lib/treewalkers/__init__.py
@@ -14,6 +14,7 @@
 
 treeWalkerCache = {}
 
+
 def getTreeWalker(treeType, implementation=None, **kwargs):
     """Get a TreeWalker class for various types of tree with built-in support
 
diff --git a/html5lib/treewalkers/_base.py b/html5lib/treewalkers/_base.py
index 43c3f8de..5b9c1e26 100644
--- a/html5lib/treewalkers/_base.py
+++ b/html5lib/treewalkers/_base.py
@@ -7,6 +7,7 @@
 from html5lib.constants import voidElements, spaceCharacters
 spaceCharacters = "".join(spaceCharacters)
 
+
 class TreeWalker(object):
     def __init__(self, tree):
         self.tree = tree
@@ -21,12 +22,12 @@ def emptyTag(self, namespace, name, attrs, hasChildren=False):
         assert namespace is None or isinstance(namespace, text_type), type(namespace)
         assert isinstance(name, text_type), type(name)
         assert all((namespace is None or isinstance(namespace, text_type)) and
-                                isinstance(name, text_type) and
-                                isinstance(value, text_type)
-                                for (namespace, name), value in attrs.items())
+                   isinstance(name, text_type) and
+                   isinstance(value, text_type)
+                   for (namespace, name), value in attrs.items())
 
         yield {"type": "EmptyTag", "name": name,
-               "namespace":namespace,
+               "namespace": namespace,
                "data": attrs}
         if hasChildren:
             yield self.error(_("Void element has children"))
@@ -35,13 +36,13 @@ def startTag(self, namespace, name, attrs):
         assert namespace is None or isinstance(namespace, text_type), type(namespace)
         assert isinstance(name, text_type), type(name)
         assert all((namespace is None or isinstance(namespace, text_type)) and
-                                isinstance(name, text_type) and
-                                isinstance(value, text_type)
-                                for (namespace, name), value in attrs.items())
+                   isinstance(name, text_type) and
+                   isinstance(value, text_type)
+                   for (namespace, name), value in attrs.items())
 
         return {"type": "StartTag",
                 "name": name,
-                "namespace":namespace,
+                "namespace": namespace,
                 "data": attrs}
 
     def endTag(self, namespace, name):
@@ -50,7 +51,7 @@ def endTag(self, namespace, name):
 
         return {"type": "EndTag",
                 "name": name,
-                "namespace":namespace,
+                "namespace": namespace,
                 "data": {}}
 
     def text(self, data):
@@ -58,7 +59,7 @@ def text(self, data):
 
         data = data
         middle = data.lstrip(spaceCharacters)
-        left = data[:len(data)-len(middle)]
+        left = data[:len(data) - len(middle)]
         if left:
             yield {"type": "SpaceCharacters", "data": left}
         data = middle
@@ -93,9 +94,10 @@ def entity(self, name):
     def unknown(self, nodeType):
         return self.error(_("Unknown node type: ") + nodeType)
 
+
 class RecursiveTreeWalker(TreeWalker):
     def walkChildren(self, node):
-        raise NodeImplementedError
+        raise NotImplementedError
 
     def element(self, node, namespace, name, attrs, hasChildren):
         if name in voidElements:
@@ -118,6 +120,7 @@ def element(self, node, namespace, name, attrs, hasChildren):
 ENTITY = Node.ENTITY_NODE
 UNKNOWN = "<#UNKNOWN#>"
 
+
 class NonRecursiveTreeWalker(TreeWalker):
     def getNodeDetails(self, node):
         raise NotImplementedError
@@ -137,7 +140,6 @@ def __iter__(self):
             details = self.getNodeDetails(currentNode)
             type, details = details[0], details[1:]
             hasChildren = False
-            endTag = None
 
             if type == DOCTYPE:
                 yield self.doctype(*details)
@@ -154,7 +156,6 @@ def __iter__(self):
                         yield token
                     hasChildren = False
                 else:
-                    endTag = name
                     yield self.startTag(namespace, name, attributes)
 
             elif type == COMMENT:
diff --git a/html5lib/treewalkers/dom.py b/html5lib/treewalkers/dom.py
index ddf4dc59..a01287a9 100644
--- a/html5lib/treewalkers/dom.py
+++ b/html5lib/treewalkers/dom.py
@@ -6,7 +6,7 @@
 _ = gettext.gettext
 
 from . import _base
-from html5lib.constants import voidElements
+
 
 class TreeWalker(_base.NonRecursiveTreeWalker):
     def getNodeDetails(self, node):
@@ -21,9 +21,9 @@ def getNodeDetails(self, node):
             for attr in list(node.attributes.keys()):
                 attr = node.getAttributeNode(attr)
                 if attr.namespaceURI:
-                    attrs[(attr.namespaceURI,attr.localName)] = attr.value
+                    attrs[(attr.namespaceURI, attr.localName)] = attr.value
                 else:
-                    attrs[(None,attr.name)] = attr.value
+                    attrs[(None, attr.name)] = attr.value
             return (_base.ELEMENT, node.namespaceURI, node.nodeName,
                     attrs, node.hasChildNodes())
 
diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py
index a3cefdc6..6cf3bca7 100644
--- a/html5lib/treewalkers/etree.py
+++ b/html5lib/treewalkers/etree.py
@@ -3,17 +3,19 @@
 import gettext
 _ = gettext.gettext
 
-import copy
 import re
 
+from six import text_type
+
 from . import _base
-from html5lib.constants import voidElements
-from html5lib.utils import moduleFactorFactory
+from ..utils import moduleFactoryFactory
 
 tag_regexp = re.compile("{([^}]*)}(.*)")
 
+
 def getETreeBuilder(ElementTreeImplementation):
     ElementTree = ElementTreeImplementation
+    ElementTreeCommentType = ElementTree.Comment("asd").tag
 
     class TreeWalker(_base.NonRecursiveTreeWalker):
         """Given the particular ElementTree representation, this implementation,
@@ -30,7 +32,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
            text node; either the text or tail of the current element (1)
         """
         def getNodeDetails(self, node):
-            if isinstance(node, tuple): # It might be the root Element
+            if isinstance(node, tuple):  # It might be the root Element
                 elt, key, parents, flag = node
                 if flag in ("text", "tail"):
                     return _base.TEXT, getattr(elt, flag)
@@ -47,12 +49,12 @@ def getNodeDetails(self, node):
                 return (_base.DOCTYPE, node.text,
                         node.get("publicId"), node.get("systemId"))
 
-            elif node.tag == ElementTree.Comment:
+            elif node.tag == ElementTreeCommentType:
                 return _base.COMMENT, node.text
 
             else:
-                assert type(node.tag) in (str, str), type(node.tag)
-                #This is assumed to be an ordinary element
+                assert type(node.tag) == text_type, type(node.tag)
+                # This is assumed to be an ordinary element
                 match = tag_regexp.match(node.tag)
                 if match:
                     namespace, tag = match.groups()
@@ -63,9 +65,9 @@ def getNodeDetails(self, node):
                 for name, value in list(node.attrib.items()):
                     match = tag_regexp.match(name)
                     if match:
-                        attrs[(match.group(1),match.group(2))] = value
+                        attrs[(match.group(1), match.group(2))] = value
                     else:
-                        attrs[(None,name)] = value
+                        attrs[(None, name)] = value
                 return (_base.ELEMENT, namespace, tag,
                         attrs, len(node) or node.text)
 
@@ -102,7 +104,7 @@ def getNextSibling(self, node):
                 if element.tail and flag != "tail":
                     return element, key, parents, "tail"
                 elif key < len(parents[-1]) - 1:
-                    return parents[-1][key+1], key+1, parents, None
+                    return parents[-1][key + 1], key + 1, parents, None
                 else:
                     return None
 
diff --git a/html5lib/treewalkers/genshistream.py b/html5lib/treewalkers/genshistream.py
index 88ab225e..e57ce635 100644
--- a/html5lib/treewalkers/genshistream.py
+++ b/html5lib/treewalkers/genshistream.py
@@ -2,13 +2,13 @@
 
 from genshi.core import QName
 from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
-from genshi.core  import  START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
-from genshi.output import NamespaceFlattener
+from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
 
 from . import _base
 
 from html5lib.constants import voidElements, namespaces
 
+
 class TreeWalker(_base.TreeWalker):
     def __iter__(self):
         # Buffer the events so we can pass in the following one
@@ -61,8 +61,8 @@ def tokens(self, event, next):
         elif kind == DOCTYPE:
             yield self.doctype(*data)
 
-        elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS, \
-          START_CDATA, END_CDATA, PI):
+        elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS,
+                      START_CDATA, END_CDATA, PI):
             pass
 
         else:
diff --git a/html5lib/treewalkers/lxmletree.py b/html5lib/treewalkers/lxmletree.py
index aa85e313..75c65afe 100644
--- a/html5lib/treewalkers/lxmletree.py
+++ b/html5lib/treewalkers/lxmletree.py
@@ -5,14 +5,13 @@
 from html5lib.treebuilders.etree import tag_regexp
 
 from gettext import gettext
-import sys
 _ = gettext
 
 from . import _base
 
-from html5lib.constants import voidElements
 from html5lib import ihatexml
 
+
 def ensure_str(s):
     if s is None:
         return None
@@ -21,6 +20,7 @@ def ensure_str(s):
     else:
         return s.decode("utf-8", "strict")
 
+
 class Root(object):
     def __init__(self, et):
         self.elementtree = et
@@ -51,6 +51,7 @@ def getnext(self):
     def __len__(self):
         return 1
 
+
 class Doctype(object):
     def __init__(self, root_node, name, public_id, system_id):
         self.root_node = root_node
@@ -64,6 +65,7 @@ def __init__(self, root_node, name, public_id, system_id):
     def getnext(self):
         return self.root_node.children[1]
 
+
 class FragmentRoot(Root):
     def __init__(self, children):
         self.children = [FragmentWrapper(self, child) for child in children]
@@ -72,6 +74,7 @@ def __init__(self, children):
     def getnext(self):
         return None
 
+
 class FragmentWrapper(object):
     def __init__(self, fragment_root, obj):
         self.root_node = fragment_root
@@ -129,7 +132,7 @@ def __init__(self, tree):
         self.filter = ihatexml.InfosetFilter()
 
     def getNodeDetails(self, node):
-        if isinstance(node, tuple): # Text node
+        if isinstance(node, tuple):  # Text node
             node, key = node
             assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
             return _base.TEXT, ensure_str(getattr(node, key))
@@ -147,10 +150,10 @@ def getNodeDetails(self, node):
             return _base.COMMENT, ensure_str(node.text)
 
         elif node.tag == etree.Entity:
-            return _base.ENTITY, ensure_str(node.text)[1:-1] # strip &;
+            return _base.ENTITY, ensure_str(node.text)[1:-1]  # strip &;
 
         else:
-            #This is assumed to be an ordinary element
+            # This is assumed to be an ordinary element
             match = tag_regexp.match(ensure_str(node.tag))
             if match:
                 namespace, tag = match.groups()
@@ -163,9 +166,9 @@ def getNodeDetails(self, node):
                 value = ensure_str(value)
                 match = tag_regexp.match(name)
                 if match:
-                    attrs[(match.group(1),match.group(2))] = value
+                    attrs[(match.group(1), match.group(2))] = value
                 else:
-                    attrs[(None,name)] = value
+                    attrs[(None, name)] = value
             return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag),
                     attrs, len(node) > 0 or node.text)
 
@@ -179,7 +182,7 @@ def getFirstChild(self, node):
             return node[0]
 
     def getNextSibling(self, node):
-        if isinstance(node, tuple): # Text node
+        if isinstance(node, tuple):  # Text node
             node, key = node
             assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
             if key == "text":
@@ -189,13 +192,13 @@ def getNextSibling(self, node):
                     return node[0]
                 else:
                     return None
-            else: # tail
+            else:  # tail
                 return node.getnext()
 
         return (node, "tail") if node.tail else node.getnext()
 
     def getParentNode(self, node):
-        if isinstance(node, tuple): # Text node
+        if isinstance(node, tuple):  # Text node
             node, key = node
             assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
             if key == "text":
diff --git a/html5lib/treewalkers/pulldom.py b/html5lib/treewalkers/pulldom.py
index 12501093..0ecd9790 100644
--- a/html5lib/treewalkers/pulldom.py
+++ b/html5lib/treewalkers/pulldom.py
@@ -7,13 +7,14 @@
 
 from html5lib.constants import voidElements
 
+
 class TreeWalker(_base.TreeWalker):
     def __iter__(self):
         ignore_until = None
         previous = None
         for event in self.tree:
             if previous is not None and \
-              (ignore_until is None or previous[1] is ignore_until):
+                    (ignore_until is None or previous[1] is ignore_until):
                 if previous[1] is ignore_until:
                     ignore_until = None
                 for token in self.tokens(previous, event):
@@ -35,7 +36,7 @@ def tokens(self, event, next):
             attrs = {}
             for attr in list(node.attributes.keys()):
                 attr = node.getAttributeNode(attr)
-                attrs[(attr.namespaceURI,attr.localName)] = attr.value
+                attrs[(attr.namespaceURI, attr.localName)] = attr.value
             if name in voidElements:
                 for token in self.emptyTag(namespace,
                                            name,
diff --git a/html5lib/treewalkers/simpletree.py b/html5lib/treewalkers/simpletree.py
index 48202036..58c4c0a9 100644
--- a/html5lib/treewalkers/simpletree.py
+++ b/html5lib/treewalkers/simpletree.py
@@ -5,6 +5,7 @@
 
 from . import _base
 
+
 class TreeWalker(_base.NonRecursiveTreeWalker):
     """Given that simpletree has no performant way of getting a node's
     next sibling, this implementation returns "nodes" as tuples with the
@@ -19,38 +20,38 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
     """
 
     def getNodeDetails(self, node):
-        if isinstance(node, tuple): # It might be the root Node
+        if isinstance(node, tuple):  # It might be the root Node
             parent, idx, parents = node
             node = parent.childNodes[idx]
 
         # testing node.type allows us not to import treebuilders.simpletree
-        if node.type in (1, 2): # Document or DocumentFragment
+        if node.type in (1, 2):  # Document or DocumentFragment
             return (_base.DOCUMENT,)
 
-        elif node.type == 3: # DocumentType
+        elif node.type == 3:  # DocumentType
             return _base.DOCTYPE, node.name, node.publicId, node.systemId
 
-        elif node.type == 4: # TextNode
+        elif node.type == 4:  # TextNode
             return _base.TEXT, node.value
 
-        elif node.type == 5: # Element
+        elif node.type == 5:  # Element
             attrs = {}
             for name, value in list(node.attributes.items()):
                 if isinstance(name, tuple):
-                    attrs[(name[2],name[1])] = value
+                    attrs[(name[2], name[1])] = value
                 else:
-                    attrs[(None,name)] = value
+                    attrs[(None, name)] = value
             return (_base.ELEMENT, node.namespace, node.name,
                     attrs, node.hasContent())
 
-        elif node.type == 6: # CommentNode
+        elif node.type == 6:  # CommentNode
             return _base.COMMENT, node.data
 
         else:
-            return _node.UNKNOWN, node.type
+            return _base.UNKNOWN, node.type
 
     def getFirstChild(self, node):
-        if isinstance(node, tuple): # It might be the root Node
+        if isinstance(node, tuple):  # It might be the root Node
             parent, idx, parents = node
             parents.append((parent, idx))
             node = parent.childNodes[idx]
diff --git a/html5lib/trie/_base.py b/html5lib/trie/_base.py
index c4a4354d..724486b1 100644
--- a/html5lib/trie/_base.py
+++ b/html5lib/trie/_base.py
@@ -2,6 +2,7 @@
 
 from collections import Mapping
 
+
 class Trie(Mapping):
     """Abstract base class for tries"""
 
diff --git a/html5lib/trie/datrie.py b/html5lib/trie/datrie.py
index fc98bdc3..51f3d046 100644
--- a/html5lib/trie/datrie.py
+++ b/html5lib/trie/datrie.py
@@ -1,12 +1,11 @@
 from __future__ import absolute_import, division, unicode_literals
 
-from itertools import chain
-
 from datrie import Trie as DATrie
 from six import text_type
 
 from ._base import Trie as ABCTrie
 
+
 class Trie(ABCTrie):
     def __init__(self, data):
         chars = set()
diff --git a/html5lib/trie/py.py b/html5lib/trie/py.py
index ec817d78..c2ba3da7 100644
--- a/html5lib/trie/py.py
+++ b/html5lib/trie/py.py
@@ -5,6 +5,7 @@
 
 from ._base import Trie as ABCTrie
 
+
 class Trie(ABCTrie):
     def __init__(self, data):
         if not all(isinstance(x, text_type) for x in data.keys()):
diff --git a/html5lib/utils.py b/html5lib/utils.py
index 8f5d5306..9841aebf 100644
--- a/html5lib/utils.py
+++ b/html5lib/utils.py
@@ -1,8 +1,8 @@
 from __future__ import absolute_import, division, unicode_literals
 
-from sys import version_info
 from types import ModuleType
 
+
 class MethodDispatcher(dict):
     """Dict with 2 special properties:
 
@@ -21,7 +21,7 @@ def __init__(self, items=()):
         # twice as fast. Please do careful performance testing before changing
         # anything here.
         _dictEntries = []
-        for name,value in items:
+        for name, value in items:
             if type(name) in (list, tuple, frozenset, set):
                 for item in name:
                     _dictEntries.append((item, value))
@@ -34,14 +34,15 @@ def __getitem__(self, key):
         return dict.get(self, key, self.default)
 
 
-#Some utility functions to dal with weirdness around UCS2 vs UCS4
-#python builds
+# Some utility functions to dal with weirdness around UCS2 vs UCS4
+# python builds
 
 def isSurrogatePair(data):
     return (len(data) == 2 and
             ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and
             ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF)
 
+
 def surrogatePairToCodepoint(data):
     char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 +
                 (ord(data[1]) - 0xDC00))
@@ -50,10 +51,12 @@ def surrogatePairToCodepoint(data):
 # Module Factory Factory (no, this isn't Java, I know)
 # Here to stop this being duplicated all over the place.
 
+
 def moduleFactoryFactory(factory):
     moduleCache = {}
+
     def moduleFactory(baseModule, *args, **kwargs):
-        if type(ModuleType.__name__) is type(""):
+        if isinstance(ModuleType.__name__, type("")):
             name = "_%s_factory" % baseModule.__name__
         else:
             name = b"_%s_factory" % baseModule.__name__