From 6f7bb4a05254bb22de3b7c6dcc2ce69908fa8e4c Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Sat, 13 Apr 2013 18:59:24 +0100
Subject: [PATCH 01/12] Placate pyflakes.

This is mostly just removing dead variables, however, there are a few
substantial changes in here:

 - Move to using try/except ImportError/else in tests where we are
   checking some module existing, as this was hiding genuine bugs
   that manifested themselves as ImportError (the etree treewalker was
   throwing ImportError when being imported).

 - Fixes the ImportError the etree treewalker was throwing (this was,
   too, reported as a bug by pyflakes, thereby showing its
   value). However, given it has been untested for a while, it is
   unsurprisingly broken, failing thousands of tests.

 - The parser defined the scriptDataDoubleEscapedDashState twice,
   therefore everything that should've been run in this state was in
   fact run in the scriptDataDoubleEscapedDashDashState.

This also adds flake8 to Travis, albeit running it without any PEP 8
errors showing.
---
 .travis.yml                           | 10 +++++++++
 html5lib/__init__.py                  |  2 ++
 html5lib/html5parser.py               | 16 ++++++--------
 html5lib/inputstream.py               |  5 +----
 html5lib/serializer/htmlserializer.py |  3 ---
 html5lib/tests/__init__.py            | 11 ---------
 html5lib/tests/support.py             |  8 +++----
 html5lib/tests/test_encoding.py       | 12 +++++-----
 html5lib/tests/test_parser.py         |  4 +---
 html5lib/tests/test_parser2.py        |  2 +-
 html5lib/tests/test_sanitizer.py      |  4 ----
 html5lib/tests/test_serializer.py     |  4 +---
 html5lib/tests/test_stream.py         |  2 +-
 html5lib/tests/test_tokenizer.py      |  4 ----
 html5lib/tests/test_treewalkers.py    | 32 ++++++++++-----------------
 html5lib/tests/tokenizertotree.py     |  1 -
 html5lib/tokenizer.py                 |  6 ++---
 html5lib/treebuilders/__init__.py     |  2 --
 html5lib/treebuilders/dom.py          |  3 +--
 html5lib/treebuilders/etree.py        | 20 ++++-------------
 html5lib/treewalkers/_base.py         |  4 +---
 html5lib/treewalkers/dom.py           |  1 -
 html5lib/treewalkers/etree.py         |  4 +---
 html5lib/treewalkers/genshistream.py  |  1 -
 html5lib/treewalkers/lxmletree.py     |  2 --
 html5lib/treewalkers/simpletree.py    |  2 +-
 html5lib/trie/datrie.py               |  2 --
 html5lib/utils.py                     |  1 -
 28 files changed, 56 insertions(+), 112 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index a48d27f5..8402ab18 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -10,6 +10,14 @@ env:
   - USE_OPTIONAL=true
   - USE_OPTIONAL=false
 
+matrix:
+  exclude:
+    - python: 3.3
+      env: USE_OPTIONAL=false
+  include:
+    - python: 3.3
+      env: USE_OPTIONAL=false FLAKE=true
+
 before_install:
   - git submodule update --init --recursive
 
@@ -19,9 +27,11 @@ install:
   - if [[ $TRAVIS_PYTHON_VERSION != 3.* && $USE_OPTIONAL == "true" ]]; then pip install -r requirements-optional-2.txt --use-mirrors; fi
   - if [[ $TRAVIS_PYTHON_VERSION == 3.* && $USE_OPTIONAL == "true" ]]; then pip install -r requirements-optional-3.txt --use-mirrors; fi
   - if [[ $TRAVIS_PYTHON_VERSION != "pypy" && $USE_OPTIONAL == "true" ]]; then pip install -r requirements-optional-cpython.txt --use-mirrors; fi
+  - if [[ $FLAKE == "true" ]]; then pip install --use-mirrors flake8; fi
 
 script:
   - nosetests
+  - if [[ $FLAKE == "true" ]]; then flake8 --exclude=E,W html5lib; fi
 
 after_script:
   - python debug-info.py
diff --git a/html5lib/__init__.py b/html5lib/__init__.py
index 528da9fa..10e2b74c 100644
--- a/html5lib/__init__.py
+++ b/html5lib/__init__.py
@@ -18,4 +18,6 @@
 from .treewalkers import getTreeWalker
 from .serializer import serialize
 
+__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
+           "getTreeWalker", "serialize"]
 __version__ = "1.0b1"
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
index 989691a4..9d319a5c 100644
--- a/html5lib/html5parser.py
+++ b/html5lib/html5parser.py
@@ -1,7 +1,6 @@
 from __future__ import absolute_import, division, unicode_literals
 from six import with_metaclass
 
-import sys
 import types
 
 from . import inputstream
@@ -14,10 +13,10 @@
 from . import utils
 from . import constants
 from .constants import spaceCharacters, asciiUpper2Lower
-from .constants import formattingElements, specialElements
-from .constants import headingElements, tableInsertModeElements
-from .constants import cdataElements, rcdataElements, voidElements
-from .constants import tokenTypes, ReparseException, namespaces, spaceCharacters
+from .constants import specialElements
+from .constants import headingElements
+from .constants import cdataElements, rcdataElements
+from .constants import tokenTypes, ReparseException, namespaces
 from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements
 
 def parse(doc, treebuilder="simpletree", encoding=None,
@@ -88,7 +87,7 @@ def _parse(self, stream, innerHTML=False, container="div",
             try:
                 self.mainLoop()
                 break
-            except ReparseException as e:
+            except ReparseException:
                 self.reset()
 
     def reset(self):
@@ -405,7 +404,7 @@ def parseRCDataRawtext(self, token, contentType):
         """
         assert contentType in ("RAWTEXT", "RCDATA")
 
-        element = self.tree.insertElement(token)
+        self.tree.insertElement(token)
 
         if contentType == "RAWTEXT":
             self.tokenizer.state = self.tokenizer.rawtextState
@@ -1402,7 +1401,6 @@ def endTagFormatting(self, token):
             """The much-feared adoption agency algorithm"""
             # http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
             # XXX Better parseError messages appreciated.
-            name = token["name"]
 
             # Step 1
             outerLoopCounter = 0
@@ -1620,7 +1618,7 @@ def endTagScript(self, token):
             #document.write works
 
         def endTagOther(self, token):
-            node = self.tree.openElements.pop()
+            self.tree.openElements.pop()
             self.parser.phase = self.parser.originalPhase
 
     class InTablePhase(Phase):
diff --git a/html5lib/inputstream.py b/html5lib/inputstream.py
index ca2514e6..65875b85 100644
--- a/html5lib/inputstream.py
+++ b/html5lib/inputstream.py
@@ -3,7 +3,6 @@
 
 import codecs
 import re
-import types
 import sys
 
 from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
@@ -107,8 +106,7 @@ def _readFromBuffer(self, bytes):
                 bytesToRead = len(bufferedData) - bufferOffset
                 self.position = [bufferIndex, len(bufferedData)]
                 bufferIndex += 1
-            data = rv.append(bufferedData[bufferOffset:
-                                          bufferOffset + bytesToRead])
+            rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead])
             remainingBytes -= bytesToRead
 
             bufferOffset = 0
@@ -290,7 +288,6 @@ def characterErrorsUCS2(self, data):
         #Someone picked the wrong compile option
         #You lose
         skip = False
-        import sys
         for match in invalid_unicode_re.finditer(data):
             if skip:
                 continue
diff --git a/html5lib/serializer/htmlserializer.py b/html5lib/serializer/htmlserializer.py
index 550b4db2..ac6a4e41 100644
--- a/html5lib/serializer/htmlserializer.py
+++ b/html5lib/serializer/htmlserializer.py
@@ -24,8 +24,6 @@
 else:
     unicode_encode_errors = "htmlentityreplace"
 
-    from html5lib.constants import entities
-
     encode_entity_map = {}
     is_ucs4 = len("\U0010FFFF") == 1
     for k, v in list(entities.items()):
@@ -228,7 +226,6 @@ def serialize(self, treewalker, encoding=None):
                     in_cdata = True
                 elif in_cdata:
                     self.serializeError(_("Unexpected child element of a CDATA element"))
-                attributes = []
                 for (attr_namespace,attr_name),attr_value in sorted(token["data"].items()):
                     #TODO: Add namespace support here
                     k = attr_name
diff --git a/html5lib/tests/__init__.py b/html5lib/tests/__init__.py
index 903df92a..b8ce2de3 100644
--- a/html5lib/tests/__init__.py
+++ b/html5lib/tests/__init__.py
@@ -1,12 +1 @@
 from __future__ import absolute_import, division, unicode_literals
-
-import sys
-import os
-
-parent_path = os.path.abspath(os.path.join(os.path.split(__file__)[0], ".."))
-
-if not parent_path in sys.path:
-    sys.path.insert(0, parent_path)
-del parent_path
-
-from . import support
diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py
index c9c3236b..3dcdc39b 100644
--- a/html5lib/tests/support.py
+++ b/html5lib/tests/support.py
@@ -12,8 +12,7 @@
                                                 os.path.pardir,
                                                 os.path.pardir)))
 
-import html5lib
-from html5lib import html5parser, treebuilders
+from html5lib import treebuilders
 del base_path
 
 #Build a dict of avaliable trees
@@ -43,10 +42,11 @@
         pass
 
 try:
-    import lxml.etree as lxml
-    treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml")
+    import lxml.etree as lxml # flake8: noqa
 except ImportError:
     pass
+else:
+    treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml")
 
 def get_data_files(subdirectory, files='*.dat'):
     return glob.glob(os.path.join(test_dir,subdirectory,files))
diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py
index 74730e60..769e5a55 100644
--- a/html5lib/tests/test_encoding.py
+++ b/html5lib/tests/test_encoding.py
@@ -1,6 +1,5 @@
 from __future__ import absolute_import, division, unicode_literals
 
-import re
 import os
 import unittest
 
@@ -27,7 +26,7 @@ def test_codec_name_d(self):
 
 def runParserEncodingTest(data, encoding):
     p = HTMLParser()
-    t = p.parse(data, useChardet=False)
+    p.parse(data, useChardet=False)
     encoding = encoding.lower().decode("ascii")
 
     assert encoding == p.tokenizer.stream.charEncoding[0], errorMessage(data, encoding, p.tokenizer.stream.charEncoding[0])
@@ -44,18 +43,17 @@ def runPreScanEncodingTest(data, encoding):
 
 def test_encoding():
     for filename in get_data_files("encoding"):
-        test_name = os.path.basename(filename).replace('.dat',''). \
-            replace('-','')
         tests = TestData(filename, b"data", encoding=None)
         for idx, test in enumerate(tests):
             yield (runParserEncodingTest, test[b'data'], test[b'encoding'])
             yield (runPreScanEncodingTest, test[b'data'], test[b'encoding'])
 
 try:
-    import chardet
+    import chardet # flake8: noqa
+except ImportError:
+    print("chardet not found, skipping chardet tests")
+else:
     def test_chardet():
         data = open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt"), "rb").read()
         encoding = inputstream.HTMLInputStream(data).charEncoding
         assert encoding[0].lower() == "big5"
-except ImportError:
-    print("chardet not found, skipping chardet tests")
diff --git a/html5lib/tests/test_parser.py b/html5lib/tests/test_parser.py
index 0bcd9787..ae5b87fd 100644
--- a/html5lib/tests/test_parser.py
+++ b/html5lib/tests/test_parser.py
@@ -3,7 +3,6 @@
 import os
 import sys
 import traceback
-import io
 import warnings
 import re
 
@@ -11,8 +10,7 @@
 
 from .support import get_data_files
 from .support import TestData, convert, convertExpected, treeTypes
-import html5lib
-from html5lib import html5parser, treebuilders, constants
+from html5lib import html5parser, constants
 
 #Run the parse error checks
 checkParseErrors = False
diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py
index 048f41dc..a3a58a2b 100755
--- a/html5lib/tests/test_parser2.py
+++ b/html5lib/tests/test_parser2.py
@@ -2,7 +2,7 @@
 
 import io
 
-from . import support
+from . import support # flake8: noqa
 from html5lib import html5parser
 from html5lib.constants import namespaces
 from html5lib.treebuilders import dom
diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py
index 2da80d39..ab5de5fe 100644
--- a/html5lib/tests/test_sanitizer.py
+++ b/html5lib/tests/test_sanitizer.py
@@ -1,9 +1,5 @@
 from __future__ import absolute_import, division, unicode_literals
 
-import os
-import sys
-import unittest
-
 try:
     import json
 except ImportError:
diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py
index 21abc5ba..25eee1f0 100644
--- a/html5lib/tests/test_serializer.py
+++ b/html5lib/tests/test_serializer.py
@@ -1,6 +1,5 @@
 from __future__ import absolute_import, division, unicode_literals
 
-import os
 import unittest
 from .support import get_data_files
 
@@ -15,7 +14,7 @@
     unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
 
 import html5lib
-from html5lib import html5parser, serializer, constants
+from html5lib import serializer, constants
 from html5lib.treewalkers._base import TreeWalker
 
 optionals_loaded = []
@@ -172,6 +171,5 @@ def test_serializer():
     for filename in get_data_files('serializer', '*.test'):
         with open(filename) as fp:
             tests = json.load(fp)
-            test_name = os.path.basename(filename).replace('.test','')
             for index, test in enumerate(tests['tests']):
                 yield runSerializerTest, test["input"], test["expected"], test.get("options", {})
diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py
index cc8035fd..cd4a8132 100755
--- a/html5lib/tests/test_stream.py
+++ b/html5lib/tests/test_stream.py
@@ -1,6 +1,6 @@
 from __future__ import absolute_import, division, unicode_literals
 
-from . import support
+from . import support # flake8: noqa
 import unittest, codecs
 
 from html5lib.inputstream import HTMLInputStream, HTMLUnicodeInputStream, HTMLBinaryInputStream
diff --git a/html5lib/tests/test_tokenizer.py b/html5lib/tests/test_tokenizer.py
index c1be14cf..ddbdf03b 100644
--- a/html5lib/tests/test_tokenizer.py
+++ b/html5lib/tests/test_tokenizer.py
@@ -2,9 +2,6 @@
 
 
 
-import sys
-import os
-import io
 import warnings
 import re
 
@@ -176,7 +173,6 @@ def testTokenizer():
     for filename in get_data_files('tokenizer', '*.test'):
         with open(filename) as fp:
             tests = json.load(fp)
-            testName = os.path.basename(filename).replace(".test","")
             if 'tests' in tests:
                 for index,test in enumerate(tests['tests']):
                     if 'initialStates' not in test:
diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
index 566acf81..a09dde7a 100644
--- a/html5lib/tests/test_treewalkers.py
+++ b/html5lib/tests/test_treewalkers.py
@@ -14,7 +14,6 @@
 from .support import get_data_files, TestData, convertExpected
 
 from html5lib import html5parser, treewalkers, treebuilders, constants
-from html5lib.filters.lint import Filter as LintFilter, LintError
 
 def PullDOMAdapter(node):
     from xml.dom import Node
@@ -58,42 +57,35 @@ def PullDOMAdapter(node):
 #"supposed" to work
 try:
     import xml.etree.ElementTree as ElementTree
+except ImportError:
+    pass
+else:
     treeTypes['ElementTree'] = \
         {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
          "walker":  treewalkers.getTreeWalker("etree", ElementTree)}
-except ImportError:
-    try:
-        import elementtree.ElementTree as ElementTree
-        treeTypes['ElementTree'] = \
-            {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
-             "walker":  treewalkers.getTreeWalker("etree", ElementTree)}
-    except ImportError:
-        pass
 
 try:
     import xml.etree.cElementTree as ElementTree
+except ImportError:
+    pass
+else:
     treeTypes['cElementTree'] = \
         {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
          "walker":  treewalkers.getTreeWalker("etree", ElementTree)}
-except ImportError:
-    try:
-        import cElementTree as ElementTree
-        treeTypes['cElementTree'] = \
-            {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
-             "walker":  treewalkers.getTreeWalker("etree", ElementTree)}
-    except ImportError:
-        pass
+
 
 try:
-    import lxml.etree as ElementTree
+    import lxml.etree as ElementTree # flake8: noqa
+except ImportError:
+    pass
+else:
 #    treeTypes['lxml_as_etree'] = \
 #        {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
 #         "walker":  treewalkers.getTreeWalker("etree", ElementTree)}
     treeTypes['lxml_native'] = \
         {"builder": treebuilders.getTreeBuilder("lxml"),
          "walker":  treewalkers.getTreeWalker("lxml")}
-except ImportError:
-    pass
+
 
 #Try whatever etree implementations are available from a list that are
 #"supposed" to work
diff --git a/html5lib/tests/tokenizertotree.py b/html5lib/tests/tokenizertotree.py
index ddcaa69f..8668561d 100644
--- a/html5lib/tests/tokenizertotree.py
+++ b/html5lib/tests/tokenizertotree.py
@@ -7,7 +7,6 @@
 
 import html5lib
 from . import support
-from . import test_parser
 from . import test_tokenizer
 
 p = html5lib.HTMLParser()
diff --git a/html5lib/tokenizer.py b/html5lib/tokenizer.py
index 72d3057a..dd54eb67 100644
--- a/html5lib/tokenizer.py
+++ b/html5lib/tokenizer.py
@@ -8,8 +8,8 @@
 from collections import deque
 
 from .constants import spaceCharacters
-from .constants import entitiesWindows1252, entities
-from .constants import asciiLowercase, asciiLetters, asciiUpper2Lower
+from .constants import entities
+from .constants import asciiLetters, asciiUpper2Lower
 from .constants import digits, hexDigits, EOF
 from .constants import tokenTypes, tagTokenTypes
 from .constants import replacementCharacters
@@ -798,7 +798,7 @@ def scriptDataDoubleEscapedDashState(self):
             self.state = self.scriptDataDoubleEscapedState
         return True
 
-    def scriptDataDoubleEscapedDashState(self):
+    def scriptDataDoubleEscapedDashDashState(self):
         data = self.stream.char()
         if data == "-":
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
diff --git a/html5lib/treebuilders/__init__.py b/html5lib/treebuilders/__init__.py
index 122fdc2e..e44e9914 100755
--- a/html5lib/treebuilders/__init__.py
+++ b/html5lib/treebuilders/__init__.py
@@ -34,8 +34,6 @@
 
 treeBuilderCache = {}
 
-import sys
-
 def getTreeBuilder(treeType, implementation=None, **kwargs):
     """Get a TreeBuilder class for various types of tree with built-in support
 
diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py
index 7c6358b7..f48a53fe 100644
--- a/html5lib/treebuilders/dom.py
+++ b/html5lib/treebuilders/dom.py
@@ -2,11 +2,10 @@
 
 
 from xml.dom import minidom, Node, XML_NAMESPACE, XMLNS_NAMESPACE
-import re
 import weakref
 
 from . import _base
-from html5lib import constants, ihatexml
+from html5lib import constants
 from html5lib.constants import namespaces
 from html5lib.utils import moduleFactoryFactory
 
diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py
index 48c3ce7c..8dc9c86b 100755
--- a/html5lib/treebuilders/etree.py
+++ b/html5lib/treebuilders/etree.py
@@ -190,7 +190,6 @@ def __init__(self):
 
     def testSerializer(element):
         rv = []
-        finalText = None
         def serializeElement(element, indent=0):
             if not(hasattr(element, "tag")):
                 element = element.getroot()
@@ -204,10 +203,8 @@ def serializeElement(element, indent=0):
                     rv.append("<!DOCTYPE %s>"%(element.text,))
             elif element.tag == "DOCUMENT_ROOT":
                 rv.append("#document")
-                if element.text:
-                    rv.append("|%s\"%s\""%(' '*(indent+2), element.text))
-                if element.tail:
-                    finalText = element.tail
+                assert element.text is None
+                assert element.tail is None
             elif element.tag == ElementTreeCommentType:
                 rv.append("|%s<!-- %s -->"%(' '*indent, element.text))
             else:
@@ -245,15 +242,11 @@ def serializeElement(element, indent=0):
                 rv.append("|%s\"%s\"" %(' '*(indent-2), element.tail))
         serializeElement(element, 0)
 
-        if finalText is not None:
-            rv.append("|%s\"%s\""%(' '*2, finalText))
-
         return "\n".join(rv)
 
     def tostring(element):
         """Serialize an element and its child nodes to a string"""
         rv = []
-        finalText = None
         filter = ihatexml.InfosetFilter()
         def serializeElement(element):
             if type(element) == type(ElementTree.ElementTree):
@@ -268,10 +261,8 @@ def serializeElement(element):
                 else:
                     rv.append("<!DOCTYPE %s>"%(element.text,))
             elif element.tag == "DOCUMENT_ROOT":
-                if element.text:
-                    rv.append(element.text)
-                if element.tail:
-                    finalText = element.tail
+                assert element.text is None
+                assert element.tail is None
 
                 for child in element:
                     serializeElement(child)
@@ -300,9 +291,6 @@ def serializeElement(element):
 
         serializeElement(element)
 
-        if finalText is not None:
-            rv.append("%s\""%(' '*2, finalText))
-
         return "".join(rv)
 
     class TreeBuilder(_base.TreeBuilder):
diff --git a/html5lib/treewalkers/_base.py b/html5lib/treewalkers/_base.py
index 43c3f8de..69da1af6 100644
--- a/html5lib/treewalkers/_base.py
+++ b/html5lib/treewalkers/_base.py
@@ -95,7 +95,7 @@ def unknown(self, nodeType):
 
 class RecursiveTreeWalker(TreeWalker):
     def walkChildren(self, node):
-        raise NodeImplementedError
+        raise NotImplementedError
 
     def element(self, node, namespace, name, attrs, hasChildren):
         if name in voidElements:
@@ -137,7 +137,6 @@ def __iter__(self):
             details = self.getNodeDetails(currentNode)
             type, details = details[0], details[1:]
             hasChildren = False
-            endTag = None
 
             if type == DOCTYPE:
                 yield self.doctype(*details)
@@ -154,7 +153,6 @@ def __iter__(self):
                         yield token
                     hasChildren = False
                 else:
-                    endTag = name
                     yield self.startTag(namespace, name, attributes)
 
             elif type == COMMENT:
diff --git a/html5lib/treewalkers/dom.py b/html5lib/treewalkers/dom.py
index ddf4dc59..2739e7a4 100644
--- a/html5lib/treewalkers/dom.py
+++ b/html5lib/treewalkers/dom.py
@@ -6,7 +6,6 @@
 _ = gettext.gettext
 
 from . import _base
-from html5lib.constants import voidElements
 
 class TreeWalker(_base.NonRecursiveTreeWalker):
     def getNodeDetails(self, node):
diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py
index a3cefdc6..2006cdf0 100644
--- a/html5lib/treewalkers/etree.py
+++ b/html5lib/treewalkers/etree.py
@@ -3,12 +3,10 @@
 import gettext
 _ = gettext.gettext
 
-import copy
 import re
 
 from . import _base
-from html5lib.constants import voidElements
-from html5lib.utils import moduleFactorFactory
+from ..utils import moduleFactoryFactory
 
 tag_regexp = re.compile("{([^}]*)}(.*)")
 
diff --git a/html5lib/treewalkers/genshistream.py b/html5lib/treewalkers/genshistream.py
index 88ab225e..365d6aec 100644
--- a/html5lib/treewalkers/genshistream.py
+++ b/html5lib/treewalkers/genshistream.py
@@ -3,7 +3,6 @@
 from genshi.core import QName
 from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
 from genshi.core  import  START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
-from genshi.output import NamespaceFlattener
 
 from . import _base
 
diff --git a/html5lib/treewalkers/lxmletree.py b/html5lib/treewalkers/lxmletree.py
index aa85e313..186f9082 100644
--- a/html5lib/treewalkers/lxmletree.py
+++ b/html5lib/treewalkers/lxmletree.py
@@ -5,12 +5,10 @@
 from html5lib.treebuilders.etree import tag_regexp
 
 from gettext import gettext
-import sys
 _ = gettext
 
 from . import _base
 
-from html5lib.constants import voidElements
 from html5lib import ihatexml
 
 def ensure_str(s):
diff --git a/html5lib/treewalkers/simpletree.py b/html5lib/treewalkers/simpletree.py
index 48202036..a2abec85 100644
--- a/html5lib/treewalkers/simpletree.py
+++ b/html5lib/treewalkers/simpletree.py
@@ -47,7 +47,7 @@ def getNodeDetails(self, node):
             return _base.COMMENT, node.data
 
         else:
-            return _node.UNKNOWN, node.type
+            return _base.UNKNOWN, node.type
 
     def getFirstChild(self, node):
         if isinstance(node, tuple): # It might be the root Node
diff --git a/html5lib/trie/datrie.py b/html5lib/trie/datrie.py
index fc98bdc3..762b471f 100644
--- a/html5lib/trie/datrie.py
+++ b/html5lib/trie/datrie.py
@@ -1,7 +1,5 @@
 from __future__ import absolute_import, division, unicode_literals
 
-from itertools import chain
-
 from datrie import Trie as DATrie
 from six import text_type
 
diff --git a/html5lib/utils.py b/html5lib/utils.py
index 8f5d5306..4363182b 100644
--- a/html5lib/utils.py
+++ b/html5lib/utils.py
@@ -1,6 +1,5 @@
 from __future__ import absolute_import, division, unicode_literals
 
-from sys import version_info
 from types import ModuleType
 
 class MethodDispatcher(dict):

From e5b123efef2382d9939c501c2cb9ef248b360210 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Sat, 13 Apr 2013 19:10:46 +0100
Subject: [PATCH 02/12] Fix assertion in etree treewalker, thereby making tests
 pass again.

---
 html5lib/treewalkers/etree.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py
index 2006cdf0..57de4aa9 100644
--- a/html5lib/treewalkers/etree.py
+++ b/html5lib/treewalkers/etree.py
@@ -5,6 +5,8 @@
 
 import re
 
+from six import text_type
+
 from . import _base
 from ..utils import moduleFactoryFactory
 
@@ -49,7 +51,7 @@ def getNodeDetails(self, node):
                 return _base.COMMENT, node.text
 
             else:
-                assert type(node.tag) in (str, str), type(node.tag)
+                assert type(node.tag) == text_type, type(node.tag)
                 #This is assumed to be an ordinary element
                 match = tag_regexp.match(node.tag)
                 if match:

From 627969f2f6ad48ba02c6cb3b26bded04414b1933 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Sat, 13 Apr 2013 20:27:05 +0100
Subject: [PATCH 03/12] (Almost) comply with PEP 8.

The two violations:

 - We don't in general comply with the 79 character per line limit.

 - constants.py violates (at least according to pep8) the hanging
   indent rule. On the whole, I disagree with the tool and have filed
   <https://github.com/jcrocholl/pep8/issues/189> with regards to
   this.
---
 .travis.yml                                 |   3 +-
 html5lib/constants.py                       | 465 ++++++++---------
 html5lib/filters/inject_meta_charset.py     |  31 +-
 html5lib/filters/lint.py                    |   5 +-
 html5lib/filters/optionaltags.py            |   5 +-
 html5lib/filters/sanitizer.py               |   4 +-
 html5lib/filters/whitespace.py              |   5 +-
 html5lib/html5parser.py                     | 540 ++++++++++----------
 html5lib/ihatexml.py                        | 130 ++++-
 html5lib/inputstream.py                     | 154 +++---
 html5lib/sanitizer.py                       | 273 +++++-----
 html5lib/serializer/htmlserializer.py       |  36 +-
 html5lib/tests/mockParser.py                |   6 +-
 html5lib/tests/performance/concatenation.py |   4 +
 html5lib/tests/support.py                   |  29 +-
 html5lib/tests/test_encoding.py             |   8 +-
 html5lib/tests/test_parser.py               |  33 +-
 html5lib/tests/test_parser2.py              |  46 +-
 html5lib/tests/test_sanitizer.py            |  54 +-
 html5lib/tests/test_serializer.py           |  15 +-
 html5lib/tests/test_stream.py               |  12 +-
 html5lib/tests/test_tokenizer.py            |  30 +-
 html5lib/tests/test_treewalkers.py          | 101 ++--
 html5lib/tests/test_whitespace_filter.py    |  25 +-
 html5lib/tests/tokenizertotree.py           |  15 +-
 html5lib/tokenizer.py                       | 246 +++++----
 html5lib/treebuilders/__init__.py           |   7 +-
 html5lib/treebuilders/_base.py              |  77 +--
 html5lib/treebuilders/dom.py                | 164 +++---
 html5lib/treebuilders/etree.py              |  76 +--
 html5lib/treebuilders/etree_lxml.py         | 100 ++--
 html5lib/treebuilders/simpletree.py         |  43 +-
 html5lib/treewalkers/__init__.py            |   1 +
 html5lib/treewalkers/_base.py               |  23 +-
 html5lib/treewalkers/dom.py                 |   5 +-
 html5lib/treewalkers/etree.py               |  11 +-
 html5lib/treewalkers/genshistream.py        |   7 +-
 html5lib/treewalkers/lxmletree.py           |  21 +-
 html5lib/treewalkers/pulldom.py             |   5 +-
 html5lib/treewalkers/simpletree.py          |  19 +-
 html5lib/trie/_base.py                      |   1 +
 html5lib/trie/datrie.py                     |   1 +
 html5lib/trie/py.py                         |   1 +
 html5lib/utils.py                           |  12 +-
 44 files changed, 1531 insertions(+), 1318 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 8402ab18..262df222 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -31,7 +31,8 @@ install:
 
 script:
   - nosetests
-  - if [[ $FLAKE == "true" ]]; then flake8 --exclude=E,W html5lib; fi
+  - if [[ $FLAKE == "true" ]]; then find html5lib/ -name '*.py' -and -not -name 'constants.py' -print0 | xargs -0 flake8 --ignore=E501; fi
+  - if [[ $FLAKE == "true" ]]; then flake8 --max-line-length=99 --ignore=E126 html5lib/constants.py; fi
 
 after_script:
   - python debug-info.py
diff --git a/html5lib/constants.py b/html5lib/constants.py
index 952fef41..1866dd78 100644
--- a/html5lib/constants.py
+++ b/html5lib/constants.py
@@ -1,300 +1,301 @@
 from __future__ import absolute_import, division, unicode_literals
 
-import string, gettext
+import string
+import gettext
 _ = gettext.gettext
 
 EOF = None
 
 E = {
     "null-character":
-       _("Null character in input stream, replaced with U+FFFD."),
+        _("Null character in input stream, replaced with U+FFFD."),
     "invalid-codepoint":
-       _("Invalid codepoint in stream."),
+        _("Invalid codepoint in stream."),
     "incorrectly-placed-solidus":
-       _("Solidus (/) incorrectly placed in tag."),
+        _("Solidus (/) incorrectly placed in tag."),
     "incorrect-cr-newline-entity":
-       _("Incorrect CR newline entity, replaced with LF."),
+        _("Incorrect CR newline entity, replaced with LF."),
     "illegal-windows-1252-entity":
-       _("Entity used with illegal number (windows-1252 reference)."),
+        _("Entity used with illegal number (windows-1252 reference)."),
     "cant-convert-numeric-entity":
-       _("Numeric entity couldn't be converted to character "
-         "(codepoint U+%(charAsInt)08x)."),
+        _("Numeric entity couldn't be converted to character "
+          "(codepoint U+%(charAsInt)08x)."),
     "illegal-codepoint-for-numeric-entity":
-       _("Numeric entity represents an illegal codepoint: "
-         "U+%(charAsInt)08x."),
+        _("Numeric entity represents an illegal codepoint: "
+          "U+%(charAsInt)08x."),
     "numeric-entity-without-semicolon":
-       _("Numeric entity didn't end with ';'."),
+        _("Numeric entity didn't end with ';'."),
     "expected-numeric-entity-but-got-eof":
-       _("Numeric entity expected. Got end of file instead."),
+        _("Numeric entity expected. Got end of file instead."),
     "expected-numeric-entity":
-       _("Numeric entity expected but none found."),
+        _("Numeric entity expected but none found."),
     "named-entity-without-semicolon":
-       _("Named entity didn't end with ';'."),
+        _("Named entity didn't end with ';'."),
     "expected-named-entity":
-       _("Named entity expected. Got none."),
+        _("Named entity expected. Got none."),
     "attributes-in-end-tag":
-       _("End tag contains unexpected attributes."),
+        _("End tag contains unexpected attributes."),
     'self-closing-flag-on-end-tag':
         _("End tag contains unexpected self-closing flag."),
     "expected-tag-name-but-got-right-bracket":
-       _("Expected tag name. Got '>' instead."),
+        _("Expected tag name. Got '>' instead."),
     "expected-tag-name-but-got-question-mark":
-       _("Expected tag name. Got '?' instead. (HTML doesn't "
-         "support processing instructions.)"),
+        _("Expected tag name. Got '?' instead. (HTML doesn't "
+          "support processing instructions.)"),
     "expected-tag-name":
-       _("Expected tag name. Got something else instead"),
+        _("Expected tag name. Got something else instead"),
     "expected-closing-tag-but-got-right-bracket":
-       _("Expected closing tag. Got '>' instead. Ignoring '</>'."),
+        _("Expected closing tag. Got '>' instead. Ignoring '</>'."),
     "expected-closing-tag-but-got-eof":
-       _("Expected closing tag. Unexpected end of file."),
+        _("Expected closing tag. Unexpected end of file."),
     "expected-closing-tag-but-got-char":
-       _("Expected closing tag. Unexpected character '%(data)s' found."),
+        _("Expected closing tag. Unexpected character '%(data)s' found."),
     "eof-in-tag-name":
-       _("Unexpected end of file in the tag name."),
+        _("Unexpected end of file in the tag name."),
     "expected-attribute-name-but-got-eof":
-       _("Unexpected end of file. Expected attribute name instead."),
+        _("Unexpected end of file. Expected attribute name instead."),
     "eof-in-attribute-name":
-       _("Unexpected end of file in attribute name."),
+        _("Unexpected end of file in attribute name."),
     "invalid-character-in-attribute-name":
         _("Invalid character in attribute name"),
     "duplicate-attribute":
-       _("Dropped duplicate attribute on tag."),
+        _("Dropped duplicate attribute on tag."),
     "expected-end-of-tag-name-but-got-eof":
-       _("Unexpected end of file. Expected = or end of tag."),
+        _("Unexpected end of file. Expected = or end of tag."),
     "expected-attribute-value-but-got-eof":
-       _("Unexpected end of file. Expected attribute value."),
+        _("Unexpected end of file. Expected attribute value."),
     "expected-attribute-value-but-got-right-bracket":
-       _("Expected attribute value. Got '>' instead."),
+        _("Expected attribute value. Got '>' instead."),
     'equals-in-unquoted-attribute-value':
         _("Unexpected = in unquoted attribute"),
     'unexpected-character-in-unquoted-attribute-value':
         _("Unexpected character in unquoted attribute"),
     "invalid-character-after-attribute-name":
-       _("Unexpected character after attribute name."),
+        _("Unexpected character after attribute name."),
     "unexpected-character-after-attribute-value":
-       _("Unexpected character after attribute value."),
+        _("Unexpected character after attribute value."),
     "eof-in-attribute-value-double-quote":
-       _("Unexpected end of file in attribute value (\")."),
+        _("Unexpected end of file in attribute value (\")."),
     "eof-in-attribute-value-single-quote":
-       _("Unexpected end of file in attribute value (')."),
+        _("Unexpected end of file in attribute value (')."),
     "eof-in-attribute-value-no-quotes":
-       _("Unexpected end of file in attribute value."),
+        _("Unexpected end of file in attribute value."),
     "unexpected-EOF-after-solidus-in-tag":
         _("Unexpected end of file in tag. Expected >"),
     "unexpected-character-after-solidus-in-tag":
         _("Unexpected character after / in tag. Expected >"),
     "expected-dashes-or-doctype":
-       _("Expected '--' or 'DOCTYPE'. Not found."),
+        _("Expected '--' or 'DOCTYPE'. Not found."),
     "unexpected-bang-after-double-dash-in-comment":
         _("Unexpected ! after -- in comment"),
     "unexpected-space-after-double-dash-in-comment":
         _("Unexpected space after -- in comment"),
     "incorrect-comment":
-       _("Incorrect comment."),
+        _("Incorrect comment."),
     "eof-in-comment":
-       _("Unexpected end of file in comment."),
+        _("Unexpected end of file in comment."),
     "eof-in-comment-end-dash":
-       _("Unexpected end of file in comment (-)"),
+        _("Unexpected end of file in comment (-)"),
     "unexpected-dash-after-double-dash-in-comment":
-       _("Unexpected '-' after '--' found in comment."),
+        _("Unexpected '-' after '--' found in comment."),
     "eof-in-comment-double-dash":
-       _("Unexpected end of file in comment (--)."),
+        _("Unexpected end of file in comment (--)."),
     "eof-in-comment-end-space-state":
-       _("Unexpected end of file in comment."),
+        _("Unexpected end of file in comment."),
     "eof-in-comment-end-bang-state":
-       _("Unexpected end of file in comment."),
+        _("Unexpected end of file in comment."),
     "unexpected-char-in-comment":
-       _("Unexpected character in comment found."),
+        _("Unexpected character in comment found."),
     "need-space-after-doctype":
-       _("No space after literal string 'DOCTYPE'."),
+        _("No space after literal string 'DOCTYPE'."),
     "expected-doctype-name-but-got-right-bracket":
-       _("Unexpected > character. Expected DOCTYPE name."),
+        _("Unexpected > character. Expected DOCTYPE name."),
     "expected-doctype-name-but-got-eof":
-       _("Unexpected end of file. Expected DOCTYPE name."),
+        _("Unexpected end of file. Expected DOCTYPE name."),
     "eof-in-doctype-name":
-       _("Unexpected end of file in DOCTYPE name."),
+        _("Unexpected end of file in DOCTYPE name."),
     "eof-in-doctype":
-       _("Unexpected end of file in DOCTYPE."),
+        _("Unexpected end of file in DOCTYPE."),
     "expected-space-or-right-bracket-in-doctype":
-       _("Expected space or '>'. Got '%(data)s'"),
+        _("Expected space or '>'. Got '%(data)s'"),
     "unexpected-end-of-doctype":
-       _("Unexpected end of DOCTYPE."),
+        _("Unexpected end of DOCTYPE."),
     "unexpected-char-in-doctype":
-       _("Unexpected character in DOCTYPE."),
+        _("Unexpected character in DOCTYPE."),
     "eof-in-innerhtml":
-       _("XXX innerHTML EOF"),
+        _("XXX innerHTML EOF"),
     "unexpected-doctype":
-       _("Unexpected DOCTYPE. Ignored."),
+        _("Unexpected DOCTYPE. Ignored."),
     "non-html-root":
-       _("html needs to be the first start tag."),
+        _("html needs to be the first start tag."),
     "expected-doctype-but-got-eof":
-       _("Unexpected End of file. Expected DOCTYPE."),
+        _("Unexpected End of file. Expected DOCTYPE."),
     "unknown-doctype":
-       _("Erroneous DOCTYPE."),
+        _("Erroneous DOCTYPE."),
     "expected-doctype-but-got-chars":
-       _("Unexpected non-space characters. Expected DOCTYPE."),
+        _("Unexpected non-space characters. Expected DOCTYPE."),
     "expected-doctype-but-got-start-tag":
-       _("Unexpected start tag (%(name)s). Expected DOCTYPE."),
+        _("Unexpected start tag (%(name)s). Expected DOCTYPE."),
     "expected-doctype-but-got-end-tag":
-       _("Unexpected end tag (%(name)s). Expected DOCTYPE."),
+        _("Unexpected end tag (%(name)s). Expected DOCTYPE."),
     "end-tag-after-implied-root":
-       _("Unexpected end tag (%(name)s) after the (implied) root element."),
+        _("Unexpected end tag (%(name)s) after the (implied) root element."),
     "expected-named-closing-tag-but-got-eof":
-       _("Unexpected end of file. Expected end tag (%(name)s)."),
+        _("Unexpected end of file. Expected end tag (%(name)s)."),
     "two-heads-are-not-better-than-one":
-       _("Unexpected start tag head in existing head. Ignored."),
+        _("Unexpected start tag head in existing head. Ignored."),
     "unexpected-end-tag":
-       _("Unexpected end tag (%(name)s). Ignored."),
+        _("Unexpected end tag (%(name)s). Ignored."),
     "unexpected-start-tag-out-of-my-head":
-       _("Unexpected start tag (%(name)s) that can be in head. Moved."),
+        _("Unexpected start tag (%(name)s) that can be in head. Moved."),
     "unexpected-start-tag":
-       _("Unexpected start tag (%(name)s)."),
+        _("Unexpected start tag (%(name)s)."),
     "missing-end-tag":
-       _("Missing end tag (%(name)s)."),
+        _("Missing end tag (%(name)s)."),
     "missing-end-tags":
-       _("Missing end tags (%(name)s)."),
+        _("Missing end tags (%(name)s)."),
     "unexpected-start-tag-implies-end-tag":
-       _("Unexpected start tag (%(startName)s) "
-         "implies end tag (%(endName)s)."),
+        _("Unexpected start tag (%(startName)s) "
+          "implies end tag (%(endName)s)."),
     "unexpected-start-tag-treated-as":
-       _("Unexpected start tag (%(originalName)s). Treated as %(newName)s."),
+        _("Unexpected start tag (%(originalName)s). Treated as %(newName)s."),
     "deprecated-tag":
-       _("Unexpected start tag %(name)s. Don't use it!"),
+        _("Unexpected start tag %(name)s. Don't use it!"),
     "unexpected-start-tag-ignored":
-       _("Unexpected start tag %(name)s. Ignored."),
+        _("Unexpected start tag %(name)s. Ignored."),
     "expected-one-end-tag-but-got-another":
-       _("Unexpected end tag (%(gotName)s). "
-         "Missing end tag (%(expectedName)s)."),
+        _("Unexpected end tag (%(gotName)s). "
+          "Missing end tag (%(expectedName)s)."),
     "end-tag-too-early":
-       _("End tag (%(name)s) seen too early. Expected other end tag."),
+        _("End tag (%(name)s) seen too early. Expected other end tag."),
     "end-tag-too-early-named":
-       _("Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."),
+        _("Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."),
     "end-tag-too-early-ignored":
-       _("End tag (%(name)s) seen too early. Ignored."),
+        _("End tag (%(name)s) seen too early. Ignored."),
     "adoption-agency-1.1":
-       _("End tag (%(name)s) violates step 1, "
-         "paragraph 1 of the adoption agency algorithm."),
+        _("End tag (%(name)s) violates step 1, "
+          "paragraph 1 of the adoption agency algorithm."),
     "adoption-agency-1.2":
-       _("End tag (%(name)s) violates step 1, "
-         "paragraph 2 of the adoption agency algorithm."),
+        _("End tag (%(name)s) violates step 1, "
+          "paragraph 2 of the adoption agency algorithm."),
     "adoption-agency-1.3":
-       _("End tag (%(name)s) violates step 1, "
-         "paragraph 3 of the adoption agency algorithm."),
+        _("End tag (%(name)s) violates step 1, "
+          "paragraph 3 of the adoption agency algorithm."),
     "adoption-agency-4.4":
-       _("End tag (%(name)s) violates step 4, "
-         "paragraph 4 of the adoption agency algorithm."),
+        _("End tag (%(name)s) violates step 4, "
+          "paragraph 4 of the adoption agency algorithm."),
     "unexpected-end-tag-treated-as":
-       _("Unexpected end tag (%(originalName)s). Treated as %(newName)s."),
+        _("Unexpected end tag (%(originalName)s). Treated as %(newName)s."),
     "no-end-tag":
-       _("This element (%(name)s) has no end tag."),
+        _("This element (%(name)s) has no end tag."),
     "unexpected-implied-end-tag-in-table":
-       _("Unexpected implied end tag (%(name)s) in the table phase."),
+        _("Unexpected implied end tag (%(name)s) in the table phase."),
     "unexpected-implied-end-tag-in-table-body":
-       _("Unexpected implied end tag (%(name)s) in the table body phase."),
+        _("Unexpected implied end tag (%(name)s) in the table body phase."),
     "unexpected-char-implies-table-voodoo":
-       _("Unexpected non-space characters in "
-         "table context caused voodoo mode."),
+        _("Unexpected non-space characters in "
+          "table context caused voodoo mode."),
     "unexpected-hidden-input-in-table":
-       _("Unexpected input with type hidden in table context."),
+        _("Unexpected input with type hidden in table context."),
     "unexpected-form-in-table":
-       _("Unexpected form in table context."),
+        _("Unexpected form in table context."),
     "unexpected-start-tag-implies-table-voodoo":
-       _("Unexpected start tag (%(name)s) in "
-         "table context caused voodoo mode."),
+        _("Unexpected start tag (%(name)s) in "
+          "table context caused voodoo mode."),
     "unexpected-end-tag-implies-table-voodoo":
-       _("Unexpected end tag (%(name)s) in "
-         "table context caused voodoo mode."),
+        _("Unexpected end tag (%(name)s) in "
+          "table context caused voodoo mode."),
     "unexpected-cell-in-table-body":
-       _("Unexpected table cell start tag (%(name)s) "
-         "in the table body phase."),
+        _("Unexpected table cell start tag (%(name)s) "
+          "in the table body phase."),
     "unexpected-cell-end-tag":
-       _("Got table cell end tag (%(name)s) "
-         "while required end tags are missing."),
+        _("Got table cell end tag (%(name)s) "
+          "while required end tags are missing."),
     "unexpected-end-tag-in-table-body":
-       _("Unexpected end tag (%(name)s) in the table body phase. Ignored."),
+        _("Unexpected end tag (%(name)s) in the table body phase. Ignored."),
     "unexpected-implied-end-tag-in-table-row":
-       _("Unexpected implied end tag (%(name)s) in the table row phase."),
+        _("Unexpected implied end tag (%(name)s) in the table row phase."),
     "unexpected-end-tag-in-table-row":
-       _("Unexpected end tag (%(name)s) in the table row phase. Ignored."),
+        _("Unexpected end tag (%(name)s) in the table row phase. Ignored."),
     "unexpected-select-in-select":
-       _("Unexpected select start tag in the select phase "
-         "treated as select end tag."),
+        _("Unexpected select start tag in the select phase "
+          "treated as select end tag."),
     "unexpected-input-in-select":
-       _("Unexpected input start tag in the select phase."),
+        _("Unexpected input start tag in the select phase."),
     "unexpected-start-tag-in-select":
-       _("Unexpected start tag token (%(name)s in the select phase. "
-         "Ignored."),
+        _("Unexpected start tag token (%(name)s in the select phase. "
+          "Ignored."),
     "unexpected-end-tag-in-select":
-       _("Unexpected end tag (%(name)s) in the select phase. Ignored."),
+        _("Unexpected end tag (%(name)s) in the select phase. Ignored."),
     "unexpected-table-element-start-tag-in-select-in-table":
-       _("Unexpected table element start tag (%(name)s) in the select in table phase."),
+        _("Unexpected table element start tag (%(name)s) in the select in table phase."),
     "unexpected-table-element-end-tag-in-select-in-table":
-       _("Unexpected table element end tag (%(name)s) in the select in table phase."),
+        _("Unexpected table element end tag (%(name)s) in the select in table phase."),
     "unexpected-char-after-body":
-       _("Unexpected non-space characters in the after body phase."),
+        _("Unexpected non-space characters in the after body phase."),
     "unexpected-start-tag-after-body":
-       _("Unexpected start tag token (%(name)s)"
-         " in the after body phase."),
+        _("Unexpected start tag token (%(name)s)"
+          " in the after body phase."),
     "unexpected-end-tag-after-body":
-       _("Unexpected end tag token (%(name)s)"
-         " in the after body phase."),
+        _("Unexpected end tag token (%(name)s)"
+          " in the after body phase."),
     "unexpected-char-in-frameset":
-       _("Unexpected characters in the frameset phase. Characters ignored."),
+        _("Unexpected characters in the frameset phase. Characters ignored."),
     "unexpected-start-tag-in-frameset":
-       _("Unexpected start tag token (%(name)s)"
-         " in the frameset phase. Ignored."),
+        _("Unexpected start tag token (%(name)s)"
+          " in the frameset phase. Ignored."),
     "unexpected-frameset-in-frameset-innerhtml":
-       _("Unexpected end tag token (frameset) "
-         "in the frameset phase (innerHTML)."),
+        _("Unexpected end tag token (frameset) "
+          "in the frameset phase (innerHTML)."),
     "unexpected-end-tag-in-frameset":
-       _("Unexpected end tag token (%(name)s)"
-         " in the frameset phase. Ignored."),
+        _("Unexpected end tag token (%(name)s)"
+          " in the frameset phase. Ignored."),
     "unexpected-char-after-frameset":
-       _("Unexpected non-space characters in the "
-         "after frameset phase. Ignored."),
+        _("Unexpected non-space characters in the "
+          "after frameset phase. Ignored."),
     "unexpected-start-tag-after-frameset":
-       _("Unexpected start tag (%(name)s)"
-         " in the after frameset phase. Ignored."),
+        _("Unexpected start tag (%(name)s)"
+          " in the after frameset phase. Ignored."),
     "unexpected-end-tag-after-frameset":
-       _("Unexpected end tag (%(name)s)"
-         " in the after frameset phase. Ignored."),
+        _("Unexpected end tag (%(name)s)"
+          " in the after frameset phase. Ignored."),
     "unexpected-end-tag-after-body-innerhtml":
-       _("Unexpected end tag after body(innerHtml)"),
+        _("Unexpected end tag after body(innerHtml)"),
     "expected-eof-but-got-char":
-       _("Unexpected non-space characters. Expected end of file."),
+        _("Unexpected non-space characters. Expected end of file."),
     "expected-eof-but-got-start-tag":
-       _("Unexpected start tag (%(name)s)"
-         ". Expected end of file."),
+        _("Unexpected start tag (%(name)s)"
+          ". Expected end of file."),
     "expected-eof-but-got-end-tag":
-       _("Unexpected end tag (%(name)s)"
-         ". Expected end of file."),
+        _("Unexpected end tag (%(name)s)"
+          ". Expected end of file."),
     "eof-in-table":
-       _("Unexpected end of file. Expected table content."),
+        _("Unexpected end of file. Expected table content."),
     "eof-in-select":
-       _("Unexpected end of file. Expected select content."),
+        _("Unexpected end of file. Expected select content."),
     "eof-in-frameset":
-       _("Unexpected end of file. Expected frameset content."),
+        _("Unexpected end of file. Expected frameset content."),
     "eof-in-script-in-script":
-       _("Unexpected end of file. Expected script content."),
+        _("Unexpected end of file. Expected script content."),
     "eof-in-foreign-lands":
-       _("Unexpected end of file. Expected foreign content"),
+        _("Unexpected end of file. Expected foreign content"),
     "non-void-element-with-trailing-solidus":
-       _("Trailing solidus not allowed on element %(name)s"),
+        _("Trailing solidus not allowed on element %(name)s"),
     "unexpected-html-element-in-foreign-content":
-       _("Element %(name)s not allowed in a non-html context"),
+        _("Element %(name)s not allowed in a non-html context"),
     "unexpected-end-tag-before-html":
         _("Unexpected end tag (%(name)s) before html."),
     "XXX-undefined-error":
-        ("Undefined error (this sucks and should be fixed)"),
+        _("Undefined error (this sucks and should be fixed)"),
 }
 
 namespaces = {
-    "html":"http://www.w3.org/1999/xhtml",
-    "mathml":"http://www.w3.org/1998/Math/MathML",
-    "svg":"http://www.w3.org/2000/svg",
-    "xlink":"http://www.w3.org/1999/xlink",
-    "xml":"http://www.w3.org/XML/1998/namespace",
-    "xmlns":"http://www.w3.org/2000/xmlns/"
+    "html": "http://www.w3.org/1999/xhtml",
+    "mathml": "http://www.w3.org/1998/Math/MathML",
+    "svg": "http://www.w3.org/2000/svg",
+    "xlink": "http://www.w3.org/1999/xlink",
+    "xml": "http://www.w3.org/XML/1998/namespace",
+    "xmlns": "http://www.w3.org/2000/xmlns/"
 }
 
 scopingElements = frozenset((
@@ -454,8 +455,8 @@
 digits = frozenset(string.digits)
 hexDigits = frozenset(string.hexdigits)
 
-asciiUpper2Lower = dict([(ord(c),ord(c.lower()))
-    for c in string.ascii_uppercase])
+asciiUpper2Lower = dict([(ord(c), ord(c.lower()))
+                         for c in string.ascii_uppercase])
 
 # Heading elements need to be ordered
 headingElements = (
@@ -501,8 +502,8 @@
     "": frozenset(("irrelevant",)),
     "style": frozenset(("scoped",)),
     "img": frozenset(("ismap",)),
-    "audio": frozenset(("autoplay","controls")),
-    "video": frozenset(("autoplay","controls")),
+    "audio": frozenset(("autoplay", "controls")),
+    "video": frozenset(("autoplay", "controls")),
     "script": frozenset(("defer", "async")),
     "details": frozenset(("open",)),
     "datagrid": frozenset(("multiple", "disabled")),
@@ -521,38 +522,38 @@
 # entitiesWindows1252 has to be _ordered_ and needs to have an index. It
 # therefore can't be a frozenset.
 entitiesWindows1252 = (
-    8364,  # 0x80  0x20AC  EURO SIGN
-    65533, # 0x81          UNDEFINED
-    8218,  # 0x82  0x201A  SINGLE LOW-9 QUOTATION MARK
-    402,   # 0x83  0x0192  LATIN SMALL LETTER F WITH HOOK
-    8222,  # 0x84  0x201E  DOUBLE LOW-9 QUOTATION MARK
-    8230,  # 0x85  0x2026  HORIZONTAL ELLIPSIS
-    8224,  # 0x86  0x2020  DAGGER
-    8225,  # 0x87  0x2021  DOUBLE DAGGER
-    710,   # 0x88  0x02C6  MODIFIER LETTER CIRCUMFLEX ACCENT
-    8240,  # 0x89  0x2030  PER MILLE SIGN
-    352,   # 0x8A  0x0160  LATIN CAPITAL LETTER S WITH CARON
-    8249,  # 0x8B  0x2039  SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-    338,   # 0x8C  0x0152  LATIN CAPITAL LIGATURE OE
-    65533, # 0x8D          UNDEFINED
-    381,   # 0x8E  0x017D  LATIN CAPITAL LETTER Z WITH CARON
-    65533, # 0x8F          UNDEFINED
-    65533, # 0x90          UNDEFINED
-    8216,  # 0x91  0x2018  LEFT SINGLE QUOTATION MARK
-    8217,  # 0x92  0x2019  RIGHT SINGLE QUOTATION MARK
-    8220,  # 0x93  0x201C  LEFT DOUBLE QUOTATION MARK
-    8221,  # 0x94  0x201D  RIGHT DOUBLE QUOTATION MARK
-    8226,  # 0x95  0x2022  BULLET
-    8211,  # 0x96  0x2013  EN DASH
-    8212,  # 0x97  0x2014  EM DASH
-    732,   # 0x98  0x02DC  SMALL TILDE
-    8482,  # 0x99  0x2122  TRADE MARK SIGN
-    353,   # 0x9A  0x0161  LATIN SMALL LETTER S WITH CARON
-    8250,  # 0x9B  0x203A  SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-    339,   # 0x9C  0x0153  LATIN SMALL LIGATURE OE
-    65533, # 0x9D          UNDEFINED
-    382,   # 0x9E  0x017E  LATIN SMALL LETTER Z WITH CARON
-    376    # 0x9F  0x0178  LATIN CAPITAL LETTER Y WITH DIAERESIS
+    8364,   # 0x80  0x20AC  EURO SIGN
+    65533,  # 0x81          UNDEFINED
+    8218,   # 0x82  0x201A  SINGLE LOW-9 QUOTATION MARK
+    402,    # 0x83  0x0192  LATIN SMALL LETTER F WITH HOOK
+    8222,   # 0x84  0x201E  DOUBLE LOW-9 QUOTATION MARK
+    8230,   # 0x85  0x2026  HORIZONTAL ELLIPSIS
+    8224,   # 0x86  0x2020  DAGGER
+    8225,   # 0x87  0x2021  DOUBLE DAGGER
+    710,    # 0x88  0x02C6  MODIFIER LETTER CIRCUMFLEX ACCENT
+    8240,   # 0x89  0x2030  PER MILLE SIGN
+    352,    # 0x8A  0x0160  LATIN CAPITAL LETTER S WITH CARON
+    8249,   # 0x8B  0x2039  SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+    338,    # 0x8C  0x0152  LATIN CAPITAL LIGATURE OE
+    65533,  # 0x8D          UNDEFINED
+    381,    # 0x8E  0x017D  LATIN CAPITAL LETTER Z WITH CARON
+    65533,  # 0x8F          UNDEFINED
+    65533,  # 0x90          UNDEFINED
+    8216,   # 0x91  0x2018  LEFT SINGLE QUOTATION MARK
+    8217,   # 0x92  0x2019  RIGHT SINGLE QUOTATION MARK
+    8220,   # 0x93  0x201C  LEFT DOUBLE QUOTATION MARK
+    8221,   # 0x94  0x201D  RIGHT DOUBLE QUOTATION MARK
+    8226,   # 0x95  0x2022  BULLET
+    8211,   # 0x96  0x2013  EN DASH
+    8212,   # 0x97  0x2014  EM DASH
+    732,    # 0x98  0x02DC  SMALL TILDE
+    8482,   # 0x99  0x2122  TRADE MARK SIGN
+    353,    # 0x9A  0x0161  LATIN SMALL LETTER S WITH CARON
+    8250,   # 0x9B  0x203A  SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+    339,    # 0x9C  0x0153  LATIN SMALL LIGATURE OE
+    65533,  # 0x9D          UNDEFINED
+    382,    # 0x9E  0x017E  LATIN SMALL LETTER Z WITH CARON
+    376     # 0x9F  0x0178  LATIN CAPITAL LETTER Y WITH DIAERESIS
 )
 
 xmlEntities = frozenset(('lt;', 'gt;', 'amp;', 'apos;', 'quot;'))
@@ -2792,41 +2793,41 @@
 }
 
 replacementCharacters = {
-    0x0:"\uFFFD",
-    0x0d:"\u000D",
-    0x80:"\u20AC",
-    0x81:"\u0081",
-    0x81:"\u0081",
-    0x82:"\u201A",
-    0x83:"\u0192",
-    0x84:"\u201E",
-    0x85:"\u2026",
-    0x86:"\u2020",
-    0x87:"\u2021",
-    0x88:"\u02C6",
-    0x89:"\u2030",
-    0x8A:"\u0160",
-    0x8B:"\u2039",
-    0x8C:"\u0152",
-    0x8D:"\u008D",
-    0x8E:"\u017D",
-    0x8F:"\u008F",
-    0x90:"\u0090",
-    0x91:"\u2018",
-    0x92:"\u2019",
-    0x93:"\u201C",
-    0x94:"\u201D",
-    0x95:"\u2022",
-    0x96:"\u2013",
-    0x97:"\u2014",
-    0x98:"\u02DC",
-    0x99:"\u2122",
-    0x9A:"\u0161",
-    0x9B:"\u203A",
-    0x9C:"\u0153",
-    0x9D:"\u009D",
-    0x9E:"\u017E",
-    0x9F:"\u0178",
+    0x0: "\uFFFD",
+    0x0d: "\u000D",
+    0x80: "\u20AC",
+    0x81: "\u0081",
+    0x81: "\u0081",
+    0x82: "\u201A",
+    0x83: "\u0192",
+    0x84: "\u201E",
+    0x85: "\u2026",
+    0x86: "\u2020",
+    0x87: "\u2021",
+    0x88: "\u02C6",
+    0x89: "\u2030",
+    0x8A: "\u0160",
+    0x8B: "\u2039",
+    0x8C: "\u0152",
+    0x8D: "\u008D",
+    0x8E: "\u017D",
+    0x8F: "\u008F",
+    0x90: "\u0090",
+    0x91: "\u2018",
+    0x92: "\u2019",
+    0x93: "\u201C",
+    0x94: "\u201D",
+    0x95: "\u2022",
+    0x96: "\u2013",
+    0x97: "\u2014",
+    0x98: "\u02DC",
+    0x99: "\u2122",
+    0x9A: "\u0161",
+    0x9B: "\u203A",
+    0x9C: "\u0153",
+    0x9D: "\u009D",
+    0x9E: "\u017E",
+    0x9F: "\u0178",
 }
 
 encodings = {
@@ -3059,25 +3060,27 @@
     'x-x-big5': 'big5'}
 
 tokenTypes = {
-    "Doctype":0,
-    "Characters":1,
-    "SpaceCharacters":2,
-    "StartTag":3,
-    "EndTag":4,
-    "EmptyTag":5,
-    "Comment":6,
-    "ParseError":7
+    "Doctype": 0,
+    "Characters": 1,
+    "SpaceCharacters": 2,
+    "StartTag": 3,
+    "EndTag": 4,
+    "EmptyTag": 5,
+    "Comment": 6,
+    "ParseError": 7
 }
 
 tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"],
                            tokenTypes["EmptyTag"]))
 
 
-prefixes = dict([(v,k) for k,v in namespaces.items()])
+prefixes = dict([(v, k) for k, v in namespaces.items()])
 prefixes["http://www.w3.org/1998/Math/MathML"] = "math"
 
+
 class DataLossWarning(UserWarning):
     pass
 
+
 class ReparseException(Exception):
     pass
diff --git a/html5lib/filters/inject_meta_charset.py b/html5lib/filters/inject_meta_charset.py
index 65a3e902..ca33b70b 100644
--- a/html5lib/filters/inject_meta_charset.py
+++ b/html5lib/filters/inject_meta_charset.py
@@ -2,6 +2,7 @@
 
 from . import _base
 
+
 class Filter(_base.Filter):
     def __init__(self, source, encoding):
         _base.Filter.__init__(self, source)
@@ -20,21 +21,21 @@ def __iter__(self):
 
             elif type == "EmptyTag":
                 if token["name"].lower() == "meta":
-                   # replace charset with actual encoding
-                   has_http_equiv_content_type = False
-                   for (namespace,name),value in token["data"].items():
-                       if namespace != None:
-                           continue
-                       elif name.lower() == 'charset':
-                          token["data"][(namespace,name)] = self.encoding
-                          meta_found = True
-                          break
-                       elif name == 'http-equiv' and value.lower() == 'content-type':
-                           has_http_equiv_content_type = True
-                   else:
-                       if has_http_equiv_content_type and (None, "content") in token["data"]:
-                           token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding
-                           meta_found = True
+                    # replace charset with actual encoding
+                    has_http_equiv_content_type = False
+                    for (namespace, name), value in token["data"].items():
+                        if namespace is not None:
+                            continue
+                        elif name.lower() == 'charset':
+                            token["data"][(namespace, name)] = self.encoding
+                            meta_found = True
+                            break
+                        elif name == 'http-equiv' and value.lower() == 'content-type':
+                            has_http_equiv_content_type = True
+                    else:
+                        if has_http_equiv_content_type and (None, "content") in token["data"]:
+                            token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding
+                            meta_found = True
 
                 elif token["name"].lower() == "head" and not meta_found:
                     # insert meta into empty head
diff --git a/html5lib/filters/lint.py b/html5lib/filters/lint.py
index bf98708d..d6f37cf4 100644
--- a/html5lib/filters/lint.py
+++ b/html5lib/filters/lint.py
@@ -9,7 +9,10 @@
 from html5lib.constants import spaceCharacters
 spaceCharacters = "".join(spaceCharacters)
 
-class LintError(Exception): pass
+
+class LintError(Exception):
+    pass
+
 
 class Filter(_base.Filter):
     def __iter__(self):
diff --git a/html5lib/filters/optionaltags.py b/html5lib/filters/optionaltags.py
index 39d93ea5..fefe0b30 100644
--- a/html5lib/filters/optionaltags.py
+++ b/html5lib/filters/optionaltags.py
@@ -2,6 +2,7 @@
 
 from . import _base
 
+
 class Filter(_base.Filter):
     def slider(self):
         previous1 = previous2 = None
@@ -17,7 +18,7 @@ def __iter__(self):
             type = token["type"]
             if type == "StartTag":
                 if (token["data"] or
-                    not self.is_optional_start(token["name"], previous, next)):
+                        not self.is_optional_start(token["name"], previous, next)):
                     yield token
             elif type == "EndTag":
                 if not self.is_optional_end(token["name"], next):
@@ -75,7 +76,7 @@ def is_optional_start(self, tagname, previous, next):
                 # omit the thead and tfoot elements' end tag when they are
                 # immediately followed by a tbody element. See is_optional_end.
                 if previous and previous['type'] == 'EndTag' and \
-                  previous['name'] in ('tbody','thead','tfoot'):
+                        previous['name'] in ('tbody', 'thead', 'tfoot'):
                     return False
                 return next["name"] == 'tr'
             else:
diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py
index adaee595..2692023d 100644
--- a/html5lib/filters/sanitizer.py
+++ b/html5lib/filters/sanitizer.py
@@ -3,8 +3,10 @@
 from . import _base
 from html5lib.sanitizer import HTMLSanitizerMixin
 
+
 class Filter(_base.Filter, HTMLSanitizerMixin):
     def __iter__(self):
         for token in _base.Filter.__iter__(self):
             token = self.sanitize_token(token)
-            if token: yield token
+            if token:
+                yield token
diff --git a/html5lib/filters/whitespace.py b/html5lib/filters/whitespace.py
index c2b7fb12..1f309236 100644
--- a/html5lib/filters/whitespace.py
+++ b/html5lib/filters/whitespace.py
@@ -8,6 +8,7 @@
 
 SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
 
+
 class Filter(_base.Filter):
 
     spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
@@ -17,7 +18,7 @@ def __iter__(self):
         for token in _base.Filter.__iter__(self):
             type = token["type"]
             if type == "StartTag" \
-              and (preserve or token["name"] in self.spacePreserveElements):
+                    and (preserve or token["name"] in self.spacePreserveElements):
                 preserve += 1
 
             elif type == "EndTag" and preserve:
@@ -32,6 +33,6 @@ def __iter__(self):
 
             yield token
 
+
 def collapse_spaces(text):
     return SPACES_REGEX.sub(' ', text)
-
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
index 9d319a5c..dab175dd 100644
--- a/html5lib/html5parser.py
+++ b/html5lib/html5parser.py
@@ -19,6 +19,7 @@
 from .constants import tokenTypes, ReparseException, namespaces
 from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements
 
+
 def parse(doc, treebuilder="simpletree", encoding=None,
           namespaceHTMLElements=True):
     """Parse a string or file-like object into a tree"""
@@ -26,30 +27,33 @@ def parse(doc, treebuilder="simpletree", encoding=None,
     p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
     return p.parse(doc, encoding=encoding)
 
+
 def parseFragment(doc, container="div", treebuilder="simpletree", encoding=None,
                   namespaceHTMLElements=True):
     tb = treebuilders.getTreeBuilder(treebuilder)
     p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
     return p.parseFragment(doc, container=container, encoding=encoding)
 
+
 def method_decorator_metaclass(function):
     class Decorated(type):
         def __new__(meta, classname, bases, classDict):
             for attributeName, attribute in classDict.items():
-                if type(attribute) == types.FunctionType:
+                if isinstance(attribute, types.FunctionType):
                     attribute = function(attribute)
 
                 classDict[attributeName] = attribute
-            return  type.__new__(meta, classname, bases, classDict)
+            return type.__new__(meta, classname, bases, classDict)
     return Decorated
 
+
 class HTMLParser(object):
     """HTML parser. Generates a tree structure from a stream of (possibly
         malformed) HTML"""
 
-    def __init__(self, tree = simpletree.TreeBuilder,
-                 tokenizer = tokenizer.HTMLTokenizer, strict = False,
-                 namespaceHTMLElements = True, debug=False):
+    def __init__(self, tree=simpletree.TreeBuilder,
+                 tokenizer=tokenizer.HTMLTokenizer, strict=False,
+                 namespaceHTMLElements=True, debug=False):
         """
         strict - raise an exception when a parse error is encountered
 
@@ -94,7 +98,7 @@ def reset(self):
         self.tree.reset()
         self.firstStartTag = False
         self.errors = []
-        self.log = [] #only used with debug mode
+        self.log = []  # only used with debug mode
         # "quirks" / "limited quirks" / "no quirks"
         self.compatMode = "no quirks"
 
@@ -126,7 +130,7 @@ def reset(self):
 
     def isHTMLIntegrationPoint(self, element):
         if (element.name == "annotation-xml" and
-            element.namespace == namespaces["mathml"]):
+                element.namespace == namespaces["mathml"]):
             return ("encoding" in element.attributes and
                     element.attributes["encoding"].translate(
                         asciiUpper2Lower) in
@@ -177,7 +181,7 @@ def mainLoop(self):
                     if type == CharactersToken:
                         new_token = phase.processCharacters(new_token)
                     elif type == SpaceCharactersToken:
-                         new_token= phase.processSpaceCharacters(new_token)
+                        new_token = phase.processSpaceCharacters(new_token)
                     elif type == StartTagToken:
                         new_token = phase.processStartTag(new_token)
                     elif type == EndTagToken:
@@ -188,10 +192,9 @@ def mainLoop(self):
                         new_token = phase.processDoctype(new_token)
 
             if (type == StartTagToken and token["selfClosing"]
-                and not token["selfClosingAcknowledged"]):
+                    and not token["selfClosingAcknowledged"]):
                 self.parseError("non-void-element-with-trailing-solidus",
-                                {"name":token["name"]})
-
+                                {"name": token["name"]})
 
         # When the loop finishes it's EOF
         reprocess = True
@@ -252,77 +255,77 @@ def normalizeToken(self, token):
         return token
 
     def adjustMathMLAttributes(self, token):
-        replacements = {"definitionurl":"definitionURL"}
-        for k,v in replacements.items():
+        replacements = {"definitionurl": "definitionURL"}
+        for k, v in replacements.items():
             if k in token["data"]:
                 token["data"][v] = token["data"][k]
                 del token["data"][k]
 
     def adjustSVGAttributes(self, token):
         replacements = {
-            "attributename":"attributeName",
-            "attributetype":"attributeType",
-            "basefrequency":"baseFrequency",
-            "baseprofile":"baseProfile",
-            "calcmode":"calcMode",
-            "clippathunits":"clipPathUnits",
-            "contentscripttype":"contentScriptType",
-            "contentstyletype":"contentStyleType",
-            "diffuseconstant":"diffuseConstant",
-            "edgemode":"edgeMode",
-            "externalresourcesrequired":"externalResourcesRequired",
-            "filterres":"filterRes",
-            "filterunits":"filterUnits",
-            "glyphref":"glyphRef",
-            "gradienttransform":"gradientTransform",
-            "gradientunits":"gradientUnits",
-            "kernelmatrix":"kernelMatrix",
-            "kernelunitlength":"kernelUnitLength",
-            "keypoints":"keyPoints",
-            "keysplines":"keySplines",
-            "keytimes":"keyTimes",
-            "lengthadjust":"lengthAdjust",
-            "limitingconeangle":"limitingConeAngle",
-            "markerheight":"markerHeight",
-            "markerunits":"markerUnits",
-            "markerwidth":"markerWidth",
-            "maskcontentunits":"maskContentUnits",
-            "maskunits":"maskUnits",
-            "numoctaves":"numOctaves",
-            "pathlength":"pathLength",
-            "patterncontentunits":"patternContentUnits",
-            "patterntransform":"patternTransform",
-            "patternunits":"patternUnits",
-            "pointsatx":"pointsAtX",
-            "pointsaty":"pointsAtY",
-            "pointsatz":"pointsAtZ",
-            "preservealpha":"preserveAlpha",
-            "preserveaspectratio":"preserveAspectRatio",
-            "primitiveunits":"primitiveUnits",
-            "refx":"refX",
-            "refy":"refY",
-            "repeatcount":"repeatCount",
-            "repeatdur":"repeatDur",
-            "requiredextensions":"requiredExtensions",
-            "requiredfeatures":"requiredFeatures",
-            "specularconstant":"specularConstant",
-            "specularexponent":"specularExponent",
-            "spreadmethod":"spreadMethod",
-            "startoffset":"startOffset",
-            "stddeviation":"stdDeviation",
-            "stitchtiles":"stitchTiles",
-            "surfacescale":"surfaceScale",
-            "systemlanguage":"systemLanguage",
-            "tablevalues":"tableValues",
-            "targetx":"targetX",
-            "targety":"targetY",
-            "textlength":"textLength",
-            "viewbox":"viewBox",
-            "viewtarget":"viewTarget",
-            "xchannelselector":"xChannelSelector",
-            "ychannelselector":"yChannelSelector",
-            "zoomandpan":"zoomAndPan"
-            }
+            "attributename": "attributeName",
+            "attributetype": "attributeType",
+            "basefrequency": "baseFrequency",
+            "baseprofile": "baseProfile",
+            "calcmode": "calcMode",
+            "clippathunits": "clipPathUnits",
+            "contentscripttype": "contentScriptType",
+            "contentstyletype": "contentStyleType",
+            "diffuseconstant": "diffuseConstant",
+            "edgemode": "edgeMode",
+            "externalresourcesrequired": "externalResourcesRequired",
+            "filterres": "filterRes",
+            "filterunits": "filterUnits",
+            "glyphref": "glyphRef",
+            "gradienttransform": "gradientTransform",
+            "gradientunits": "gradientUnits",
+            "kernelmatrix": "kernelMatrix",
+            "kernelunitlength": "kernelUnitLength",
+            "keypoints": "keyPoints",
+            "keysplines": "keySplines",
+            "keytimes": "keyTimes",
+            "lengthadjust": "lengthAdjust",
+            "limitingconeangle": "limitingConeAngle",
+            "markerheight": "markerHeight",
+            "markerunits": "markerUnits",
+            "markerwidth": "markerWidth",
+            "maskcontentunits": "maskContentUnits",
+            "maskunits": "maskUnits",
+            "numoctaves": "numOctaves",
+            "pathlength": "pathLength",
+            "patterncontentunits": "patternContentUnits",
+            "patterntransform": "patternTransform",
+            "patternunits": "patternUnits",
+            "pointsatx": "pointsAtX",
+            "pointsaty": "pointsAtY",
+            "pointsatz": "pointsAtZ",
+            "preservealpha": "preserveAlpha",
+            "preserveaspectratio": "preserveAspectRatio",
+            "primitiveunits": "primitiveUnits",
+            "refx": "refX",
+            "refy": "refY",
+            "repeatcount": "repeatCount",
+            "repeatdur": "repeatDur",
+            "requiredextensions": "requiredExtensions",
+            "requiredfeatures": "requiredFeatures",
+            "specularconstant": "specularConstant",
+            "specularexponent": "specularExponent",
+            "spreadmethod": "spreadMethod",
+            "startoffset": "startOffset",
+            "stddeviation": "stdDeviation",
+            "stitchtiles": "stitchTiles",
+            "surfacescale": "surfaceScale",
+            "systemlanguage": "systemLanguage",
+            "tablevalues": "tableValues",
+            "targetx": "targetX",
+            "targety": "targetY",
+            "textlength": "textLength",
+            "viewbox": "viewBox",
+            "viewtarget": "viewTarget",
+            "xchannelselector": "xChannelSelector",
+            "ychannelselector": "yChannelSelector",
+            "zoomandpan": "zoomAndPan"
+        }
         for originalName in list(token["data"].keys()):
             if originalName in replacements:
                 svgName = replacements[originalName]
@@ -331,19 +334,19 @@ def adjustSVGAttributes(self, token):
 
     def adjustForeignAttributes(self, token):
         replacements = {
-            "xlink:actuate":("xlink", "actuate", namespaces["xlink"]),
-            "xlink:arcrole":("xlink", "arcrole", namespaces["xlink"]),
-            "xlink:href":("xlink", "href", namespaces["xlink"]),
-            "xlink:role":("xlink", "role", namespaces["xlink"]),
-            "xlink:show":("xlink", "show", namespaces["xlink"]),
-            "xlink:title":("xlink", "title", namespaces["xlink"]),
-            "xlink:type":("xlink", "type", namespaces["xlink"]),
-            "xml:base":("xml", "base", namespaces["xml"]),
-            "xml:lang":("xml", "lang", namespaces["xml"]),
-            "xml:space":("xml", "space", namespaces["xml"]),
-            "xmlns":(None, "xmlns", namespaces["xmlns"]),
-            "xmlns:xlink":("xmlns", "xlink", namespaces["xmlns"])
-            }
+            "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
+            "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]),
+            "xlink:href": ("xlink", "href", namespaces["xlink"]),
+            "xlink:role": ("xlink", "role", namespaces["xlink"]),
+            "xlink:show": ("xlink", "show", namespaces["xlink"]),
+            "xlink:title": ("xlink", "title", namespaces["xlink"]),
+            "xlink:type": ("xlink", "type", namespaces["xlink"]),
+            "xml:base": ("xml", "base", namespaces["xml"]),
+            "xml:lang": ("xml", "lang", namespaces["xml"]),
+            "xml:space": ("xml", "space", namespaces["xml"]),
+            "xmlns": (None, "xmlns", namespaces["xmlns"]),
+            "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"])
+        }
 
         for originalName in token["data"].keys():
             if originalName in replacements:
@@ -359,20 +362,20 @@ def resetInsertionMode(self):
         # specification.)
         last = False
         newModes = {
-            "select":"inSelect",
-            "td":"inCell",
-            "th":"inCell",
-            "tr":"inRow",
-            "tbody":"inTableBody",
-            "thead":"inTableBody",
-            "tfoot":"inTableBody",
-            "caption":"inCaption",
-            "colgroup":"inColumnGroup",
-            "table":"inTable",
-            "head":"inBody",
-            "body":"inBody",
-            "frameset":"inFrameset",
-            "html":"beforeHead"
+            "select": "inSelect",
+            "td": "inCell",
+            "th": "inCell",
+            "tr": "inRow",
+            "tbody": "inTableBody",
+            "thead": "inTableBody",
+            "tfoot": "inTableBody",
+            "caption": "inCaption",
+            "colgroup": "inColumnGroup",
+            "table": "inTable",
+            "head": "inBody",
+            "body": "inBody",
+            "frameset": "inFrameset",
+            "html": "beforeHead"
         }
         for node in self.tree.openElements[::-1]:
             nodeName = node.name
@@ -415,16 +418,18 @@ def parseRCDataRawtext(self, token, contentType):
 
         self.phase = self.phases["text"]
 
+
 def getPhases(debug):
     def log(function):
         """Logger that records which phase processes each token"""
         type_names = dict((value, key) for key, value in
                           constants.tokenTypes.items())
+
         def wrapped(self, *args, **kwargs):
             if function.__name__.startswith("process") and len(args) > 0:
                 token = args[0]
                 try:
-                    info = {"type":type_names[token['type']]}
+                    info = {"type": type_names[token['type']]}
                 except:
                     raise
                 if token['type'] in constants.tagTokenTypes:
@@ -475,8 +480,8 @@ def processStartTag(self, token):
             return self.startTagHandler[token["name"]](token)
 
         def startTagHtml(self, token):
-            if self.parser.firstStartTag == False and token["name"] == "html":
-               self.parser.parseError("non-html-root")
+            if not self.parser.firstStartTag and token["name"] == "html":
+                self.parser.parseError("non-html-root")
             # XXX Need a check here to see if the first start tag token emitted is
             # this token... If it's not, invoke self.parser.parseError().
             for attr, value in token["data"].items():
@@ -500,8 +505,8 @@ def processDoctype(self, token):
             systemId = token["systemId"]
             correct = token["correct"]
 
-            if (name != "html" or publicId != None or
-                systemId != None and systemId != "about:legacy-compat"):
+            if (name != "html" or publicId is not None or
+                    systemId is not None and systemId != "about:legacy-compat"):
                 self.parser.parseError("unknown-doctype")
 
             if publicId is None:
@@ -576,8 +581,8 @@ def processDoctype(self, token):
                 or publicId.startswith(
                     ("-//w3c//dtd html 4.01 frameset//",
                      "-//w3c//dtd html 4.01 transitional//")) and
-                    systemId == None
-                or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
+                    systemId is None
+                    or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
                 self.parser.compatMode = "quirks"
             elif (publicId.startswith(
                     ("-//w3c//dtd xhtml 1.0 frameset//",
@@ -585,7 +590,7 @@ def processDoctype(self, token):
                   or publicId.startswith(
                       ("-//w3c//dtd html 4.01 frameset//",
                        "-//w3c//dtd html 4.01 transitional//")) and
-                      systemId != None):
+                  systemId is not None):
                 self.parser.compatMode = "limited quirks"
 
             self.parser.phase = self.parser.phases["beforeHtml"]
@@ -601,13 +606,13 @@ def processCharacters(self, token):
 
         def processStartTag(self, token):
             self.parser.parseError("expected-doctype-but-got-start-tag",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
             self.anythingElse()
             return token
 
         def processEndTag(self, token):
             self.parser.parseError("expected-doctype-but-got-end-tag",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
             self.anythingElse()
             return token
 
@@ -616,7 +621,6 @@ def processEOF(self):
             self.anythingElse()
             return True
 
-
     class BeforeHtmlPhase(Phase):
         # helper methods
         def insertHtmlElement(self):
@@ -647,12 +651,11 @@ def processStartTag(self, token):
         def processEndTag(self, token):
             if token["name"] not in ("head", "body", "html", "br"):
                 self.parser.parseError("unexpected-end-tag-before-html",
-                  {"name": token["name"]})
+                                       {"name": token["name"]})
             else:
                 self.insertHtmlElement()
                 return token
 
-
     class BeforeHeadPhase(Phase):
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
@@ -697,13 +700,13 @@ def endTagImplyHead(self, token):
 
         def endTagOther(self, token):
             self.parser.parseError("end-tag-after-implied-root",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
 
     class InHeadPhase(Phase):
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
 
-            self.startTagHandler =  utils.MethodDispatcher([
+            self.startTagHandler = utils.MethodDispatcher([
                 ("html", self.startTagHtml),
                 ("title", self.startTagTitle),
                 (("noscript", "noframes", "style"), self.startTagNoScriptNoFramesStyle),
@@ -722,7 +725,7 @@ def __init__(self, parser, tree):
             self.endTagHandler.default = self.endTagOther
 
         # the real thing
-        def processEOF (self):
+        def processEOF(self):
             self.anythingElse()
             return True
 
@@ -766,7 +769,7 @@ def startTagTitle(self, token):
             self.parser.parseRCDataRawtext(token, "RCDATA")
 
         def startTagNoScriptNoFramesStyle(self, token):
-            #Need to decide whether to implement the scripting-disabled case
+            # Need to decide whether to implement the scripting-disabled case
             self.parser.parseRCDataRawtext(token, "RAWTEXT")
 
         def startTagScript(self, token):
@@ -781,7 +784,7 @@ def startTagOther(self, token):
 
         def endTagHead(self, token):
             node = self.parser.tree.openElements.pop()
-            assert node.name == "head", "Expected head got %s"%node.name
+            assert node.name == "head", "Expected head got %s" % node.name
             self.parser.phase = self.parser.phases["afterHead"]
 
         def endTagHtmlBodyBr(self, token):
@@ -794,12 +797,10 @@ def endTagOther(self, token):
         def anythingElse(self):
             self.endTagHead(impliedTagToken("head"))
 
-
     # XXX If we implement a parser for which scripting is disabled we need to
     # implement this phase.
     #
     # class InHeadNoScriptPhase(Phase):
-
     class AfterHeadPhase(Phase):
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
@@ -810,7 +811,7 @@ def __init__(self, parser, tree):
                 ("frameset", self.startTagFrameset),
                 (("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
                   "style", "title"),
-                  self.startTagFromHead),
+                 self.startTagFromHead),
                 ("head", self.startTagHead)
             ])
             self.startTagHandler.default = self.startTagOther
@@ -840,7 +841,7 @@ def startTagFrameset(self, token):
 
         def startTagFromHead(self, token):
             self.parser.parseError("unexpected-start-tag-out-of-my-head",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
             self.tree.openElements.append(self.tree.headPointer)
             self.parser.phases["inHead"].processStartTag(token)
             for node in self.tree.openElements[::-1]:
@@ -849,7 +850,7 @@ def startTagFromHead(self, token):
                     break
 
         def startTagHead(self, token):
-            self.parser.parseError("unexpected-start-tag", {"name":token["name"]})
+            self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
 
         def startTagOther(self, token):
             self.anythingElse()
@@ -860,21 +861,20 @@ def endTagHtmlBodyBr(self, token):
             return token
 
         def endTagOther(self, token):
-            self.parser.parseError("unexpected-end-tag", {"name":token["name"]})
+            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
 
         def anythingElse(self):
             self.tree.insertElement(impliedTagToken("body", "StartTag"))
             self.parser.phase = self.parser.phases["inBody"]
             self.parser.framesetOK = True
 
-
     class InBodyPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
         # the really-really-really-very crazy mode
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
 
-            #Keep a ref to this for special handling of whitespace in <pre>
+            # Keep a ref to this for special handling of whitespace in <pre>
             self.processSpaceCharactersNonPre = self.processSpaceCharacters
 
             self.startTagHandler = utils.MethodDispatcher([
@@ -888,15 +888,15 @@ def __init__(self, parser, tree):
                   "details", "dir", "div", "dl", "fieldset", "figcaption", "figure",
                   "footer", "header", "hgroup", "menu", "nav", "ol", "p",
                   "section", "summary", "ul"),
-                  self.startTagCloseP),
+                 self.startTagCloseP),
                 (headingElements, self.startTagHeading),
                 (("pre", "listing"), self.startTagPreListing),
                 ("form", self.startTagForm),
                 (("li", "dd", "dt"), self.startTagListItem),
-                ("plaintext",self.startTagPlaintext),
+                ("plaintext", self.startTagPlaintext),
                 ("a", self.startTagA),
                 (("b", "big", "code", "em", "font", "i", "s", "small", "strike",
-                  "strong", "tt", "u"),self.startTagFormatting),
+                  "strong", "tt", "u"), self.startTagFormatting),
                 ("nobr", self.startTagNobr),
                 ("button", self.startTagButton),
                 (("applet", "marquee", "object"), self.startTagAppletMarqueeObject),
@@ -924,21 +924,21 @@ def __init__(self, parser, tree):
             self.startTagHandler.default = self.startTagOther
 
             self.endTagHandler = utils.MethodDispatcher([
-                ("body",self.endTagBody),
-                ("html",self.endTagHtml),
+                ("body", self.endTagBody),
+                ("html", self.endTagHtml),
                 (("address", "article", "aside", "blockquote", "center",
                   "details", "dir", "div", "dl", "fieldset", "figcaption", "figure",
                   "footer", "header", "hgroup", "listing", "menu", "nav", "ol", "pre",
                   "section", "summary", "ul"), self.endTagBlock),
                 ("form", self.endTagForm),
-                ("p",self.endTagP),
+                ("p", self.endTagP),
                 (("dd", "dt", "li"), self.endTagListItem),
                 (headingElements, self.endTagHeading),
                 (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
                   "strike", "strong", "tt", "u"), self.endTagFormatting),
-                (("applet",  "marquee", "object"), self.endTagAppletMarqueeObject),
+                (("applet", "marquee", "object"), self.endTagAppletMarqueeObject),
                 ("br", self.endTagBr),
-                ])
+            ])
             self.endTagHandler.default = self.endTagOther
 
         def isMatchingFormattingElement(self, node1, node2):
@@ -980,7 +980,7 @@ def processEOF(self):
                 if node.name not in allowed_elements:
                     self.parser.parseError("expected-closing-tag-but-got-eof")
                     break
-            #Stop parsing
+            # Stop parsing
 
         def processSpaceCharactersDropNewline(self, token):
             # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we
@@ -989,7 +989,7 @@ def processSpaceCharactersDropNewline(self, token):
             self.processSpaceCharacters = self.processSpaceCharactersNonPre
             if (data.startswith("\n") and
                 self.tree.openElements[-1].name in ("pre", "listing", "textarea")
-                and not self.tree.openElements[-1].hasContent()):
+                    and not self.tree.openElements[-1].hasContent()):
                 data = data[1:]
             if data:
                 self.tree.reconstructActiveFormattingElements()
@@ -997,11 +997,11 @@ def processSpaceCharactersDropNewline(self, token):
 
         def processCharacters(self, token):
             if token["data"] == "\u0000":
-                #The tokenizer should always emit null on its own
+                # The tokenizer should always emit null on its own
                 return
             self.tree.reconstructActiveFormattingElements()
             self.tree.insertText(token["data"])
-            #This must be bad for performance
+            # This must be bad for performance
             if (self.parser.framesetOK and
                 any([char not in spaceCharacters
                      for char in token["data"]])):
@@ -1017,7 +1017,7 @@ def startTagProcessInHead(self, token):
         def startTagBody(self, token):
             self.parser.parseError("unexpected-start-tag", {"name": "body"})
             if (len(self.tree.openElements) == 1
-                or self.tree.openElements[1].name != "body"):
+                    or self.tree.openElements[1].name != "body"):
                 assert self.parser.innerHTML
             else:
                 self.parser.framesetOK = False
@@ -1063,9 +1063,9 @@ def startTagForm(self, token):
         def startTagListItem(self, token):
             self.parser.framesetOK = False
 
-            stopNamesMap = {"li":["li"],
-                            "dt":["dt", "dd"],
-                            "dd":["dt", "dd"]}
+            stopNamesMap = {"li": ["li"],
+                            "dt": ["dt", "dd"],
+                            "dd": ["dt", "dd"]}
             stopNames = stopNamesMap[token["name"]]
             for node in reversed(self.tree.openElements):
                 if node.name in stopNames:
@@ -1073,7 +1073,7 @@ def startTagListItem(self, token):
                         impliedTagToken(node.name, "EndTag"))
                     break
                 if (node.nameTuple in specialElements and
-                    node.name not in ("address", "div", "p")):
+                        node.name not in ("address", "div", "p")):
                     break
 
             if self.tree.elementInScope("p", variant="button"):
@@ -1100,7 +1100,7 @@ def startTagA(self, token):
             afeAElement = self.tree.elementInActiveFormattingElements("a")
             if afeAElement:
                 self.parser.parseError("unexpected-start-tag-implies-end-tag",
-                  {"startName": "a", "endName": "a"})
+                                       {"startName": "a", "endName": "a"})
                 self.endTagFormatting(impliedTagToken("a"))
                 if afeAElement in self.tree.openElements:
                     self.tree.openElements.remove(afeAElement)
@@ -1117,7 +1117,7 @@ def startTagNobr(self, token):
             self.tree.reconstructActiveFormattingElements()
             if self.tree.elementInScope("nobr"):
                 self.parser.parseError("unexpected-start-tag-implies-end-tag",
-                  {"startName": "nobr", "endName": "nobr"})
+                                       {"startName": "nobr", "endName": "nobr"})
                 self.processEndTag(impliedTagToken("nobr"))
                 # XXX Need tests that trigger the following
                 self.tree.reconstructActiveFormattingElements()
@@ -1126,7 +1126,7 @@ def startTagNobr(self, token):
         def startTagButton(self, token):
             if self.tree.elementInScope("button"):
                 self.parser.parseError("unexpected-start-tag-implies-end-tag",
-                  {"startName": "button", "endName": "button"})
+                                       {"startName": "button", "endName": "button"})
                 self.processEndTag(impliedTagToken("button"))
                 return token
             else:
@@ -1166,8 +1166,8 @@ def startTagInput(self, token):
             framesetOK = self.parser.framesetOK
             self.startTagVoidFormatting(token)
             if ("type" in token["data"] and
-                token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
-                #input type=hidden doesn't change framesetOK
+                    token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
+                # input type=hidden doesn't change framesetOK
                 self.parser.framesetOK = framesetOK
 
         def startTagParamSource(self, token):
@@ -1186,7 +1186,7 @@ def startTagHr(self, token):
         def startTagImage(self, token):
             # No really...
             self.parser.parseError("unexpected-start-tag-treated-as",
-              {"originalName": "image", "newName": "img"})
+                                   {"originalName": "image", "newName": "img"})
             self.processStartTag(impliedTagToken("img", "StartTag",
                                                  attributes=token["data"],
                                                  selfClosing=token["selfClosing"]))
@@ -1208,7 +1208,7 @@ def startTagIsIndex(self, token):
             else:
                 prompt = "This is a searchable index. Enter search keywords: "
             self.processCharacters(
-                {"type":tokenTypes["Characters"], "data":prompt})
+                {"type": tokenTypes["Characters"], "data": prompt})
             attributes = token["data"].copy()
             if "action" in attributes:
                 del attributes["action"]
@@ -1216,8 +1216,8 @@ def startTagIsIndex(self, token):
                 del attributes["prompt"]
             attributes["name"] = "isindex"
             self.processStartTag(impliedTagToken("input", "StartTag",
-                                                 attributes = attributes,
-                                                 selfClosing =
+                                                 attributes=attributes,
+                                                 selfClosing=
                                                  token["selfClosing"]))
             self.processEndTag(impliedTagToken("label"))
             self.processStartTag(impliedTagToken("hr", "StartTag"))
@@ -1270,8 +1270,8 @@ def startTagMath(self, token):
             self.parser.adjustForeignAttributes(token)
             token["namespace"] = namespaces["mathml"]
             self.tree.insertElement(token)
-            #Need to get the parse error right for the case where the token
-            #has a namespace not equal to the xmlns attribute
+            # Need to get the parse error right for the case where the token
+            # has a namespace not equal to the xmlns attribute
             if token["selfClosing"]:
                 self.tree.openElements.pop()
                 token["selfClosingAcknowledged"] = True
@@ -1282,8 +1282,8 @@ def startTagSvg(self, token):
             self.parser.adjustForeignAttributes(token)
             token["namespace"] = namespaces["svg"]
             self.tree.insertElement(token)
-            #Need to get the parse error right for the case where the token
-            #has a namespace not equal to the xmlns attribute
+            # Need to get the parse error right for the case where the token
+            # has a namespace not equal to the xmlns attribute
             if token["selfClosing"]:
                 self.tree.openElements.pop()
                 token["selfClosingAcknowledged"] = True
@@ -1325,7 +1325,7 @@ def endTagBody(self, token):
                                                    "tbody", "td", "tfoot",
                                                    "th", "thead", "tr", "body",
                                                    "html")):
-                        #Not sure this is the correct name for the parse error
+                        # Not sure this is the correct name for the parse error
                         self.parser.parseError(
                             "expected-one-end-tag-but-got-another",
                             {"expectedName": "body", "gotName": node.name})
@@ -1333,20 +1333,20 @@ def endTagBody(self, token):
             self.parser.phase = self.parser.phases["afterBody"]
 
         def endTagHtml(self, token):
-            #We repeat the test for the body end tag token being ignored here
+            # We repeat the test for the body end tag token being ignored here
             if self.tree.elementInScope("body"):
                 self.endTagBody(impliedTagToken("body"))
                 return token
 
         def endTagBlock(self, token):
-            #Put us back in the right whitespace handling mode
+            # Put us back in the right whitespace handling mode
             if token["name"] == "pre":
                 self.processSpaceCharacters = self.processSpaceCharactersNonPre
             inScope = self.tree.elementInScope(token["name"])
             if inScope:
                 self.tree.generateImpliedEndTags()
             if self.tree.openElements[-1].name != token["name"]:
-                 self.parser.parseError("end-tag-too-early", {"name": token["name"]})
+                self.parser.parseError("end-tag-too-early", {"name": token["name"]})
             if inScope:
                 node = self.tree.openElements.pop()
                 while node.name != token["name"]:
@@ -1357,7 +1357,7 @@ def endTagForm(self, token):
             self.tree.formPointer = None
             if node is None or not self.tree.elementInScope(node):
                 self.parser.parseError("unexpected-end-tag",
-                                       {"name":"form"})
+                                       {"name": "form"})
             else:
                 self.tree.generateImpliedEndTags()
                 if self.tree.openElements[-1] != node:
@@ -1373,7 +1373,7 @@ def endTagListItem(self, token):
             if not self.tree.elementInScope(token["name"], variant=variant):
                 self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
             else:
-                self.tree.generateImpliedEndTags(exclude = token["name"])
+                self.tree.generateImpliedEndTags(exclude=token["name"])
                 if self.tree.openElements[-1].name != token["name"]:
                     self.parser.parseError(
                         "end-tag-too-early",
@@ -1439,7 +1439,6 @@ def endTagFormatting(self, token):
                     self.tree.activeFormattingElements.remove(formattingElement)
                     return
 
-
                 # Otherwise, if there is such a node, and that node is
                 # also in the stack of open elements, but the element
                 # is not in scope, then this is a parse error; ignore
@@ -1472,10 +1471,10 @@ def endTagFormatting(self, token):
                         element = self.tree.openElements.pop()
                     self.tree.activeFormattingElements.remove(element)
                     return
-                commonAncestor = self.tree.openElements[afeIndex-1]
+                commonAncestor = self.tree.openElements[afeIndex - 1]
 
                 # Step 5
-                #if furthestBlock.parent:
+                # if furthestBlock.parent:
                 #    furthestBlock.parent.removeChild(furthestBlock)
 
                 # Step 5
@@ -1506,7 +1505,7 @@ def endTagFormatting(self, token):
                         bookmark = (self.tree.activeFormattingElements.index(node)
                                     + 1)
                     # Step 6.5
-                    #cite = node.parent
+                    # cite = node.parent
                     clone = node.cloneNode()
                     # Replace node with clone
                     self.tree.activeFormattingElements[
@@ -1553,7 +1552,7 @@ def endTagFormatting(self, token):
                 # Step 12
                 self.tree.openElements.remove(formattingElement)
                 self.tree.openElements.insert(
-                  self.tree.openElements.index(furthestBlock) + 1, clone)
+                    self.tree.openElements.index(furthestBlock) + 1, clone)
 
         def endTagAppletMarqueeObject(self, token):
             if self.tree.elementInScope(token["name"]):
@@ -1569,7 +1568,7 @@ def endTagAppletMarqueeObject(self, token):
 
         def endTagBr(self, token):
             self.parser.parseError("unexpected-end-tag-treated-as",
-              {"originalName": "br", "newName": "br element"})
+                                   {"originalName": "br", "newName": "br element"})
             self.tree.reconstructActiveFormattingElements()
             self.tree.insertElement(impliedTagToken("br", "StartTag"))
             self.tree.openElements.pop()
@@ -1594,7 +1593,7 @@ def __init__(self, parser, tree):
             self.startTagHandler = utils.MethodDispatcher([])
             self.startTagHandler.default = self.startTagOther
             self.endTagHandler = utils.MethodDispatcher([
-                    ("script", self.endTagScript)])
+                ("script", self.endTagScript)])
             self.endTagHandler.default = self.endTagOther
 
         def processCharacters(self, token):
@@ -1602,20 +1601,20 @@ def processCharacters(self, token):
 
         def processEOF(self):
             self.parser.parseError("expected-named-closing-tag-but-got-eof",
-                                   {"name":self.tree.openElements[-1].name})
+                                   {"name": self.tree.openElements[-1].name})
             self.tree.openElements.pop()
             self.parser.phase = self.parser.originalPhase
             return True
 
         def startTagOther(self, token):
-            assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode"%token['name']
+            assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name']
 
         def endTagScript(self, token):
             node = self.tree.openElements.pop()
             assert node.name == "script"
             self.parser.phase = self.parser.originalPhase
-            #The rest of this method is all stuff that only happens if
-            #document.write works
+            # The rest of this method is all stuff that only happens if
+            # document.write works
 
         def endTagOther(self, token):
             self.tree.openElements.pop()
@@ -1650,7 +1649,7 @@ def __init__(self, parser, tree):
         def clearStackToTableContext(self):
             # "clear the stack back to a table context"
             while self.tree.openElements[-1].name not in ("table", "html"):
-                #self.parser.parseError("unexpected-implied-end-tag-in-table",
+                # self.parser.parseError("unexpected-implied-end-tag-in-table",
                 #  {"name":  self.tree.openElements[-1].name})
                 self.tree.openElements.pop()
             # When the current node is <html> it's an innerHTML case
@@ -1661,7 +1660,7 @@ def processEOF(self):
                 self.parser.parseError("eof-in-table")
             else:
                 assert self.parser.innerHTML
-            #Stop parsing
+            # Stop parsing
 
         def processSpaceCharacters(self, token):
             originalPhase = self.parser.phase
@@ -1676,7 +1675,7 @@ def processCharacters(self, token):
             self.parser.phase.processCharacters(token)
 
         def insertText(self, token):
-            #If we get here there must be at least one non-whitespace character
+            # If we get here there must be at least one non-whitespace character
             # Do the table magic!
             self.tree.insertFromTable = True
             self.parser.phases["inBody"].processCharacters(token)
@@ -1708,7 +1707,7 @@ def startTagImplyTbody(self, token):
 
         def startTagTable(self, token):
             self.parser.parseError("unexpected-start-tag-implies-end-tag",
-              {"startName": "table", "endName": "table"})
+                                   {"startName": "table", "endName": "table"})
             self.parser.phase.processEndTag(impliedTagToken("table"))
             if not self.parser.innerHTML:
                 return token
@@ -1718,7 +1717,7 @@ def startTagStyleScript(self, token):
 
         def startTagInput(self, token):
             if ("type" in token["data"] and
-                token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
+                    token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
                 self.parser.parseError("unexpected-hidden-input-in-table")
                 self.tree.insertElement(token)
                 # XXX associate with form
@@ -1745,8 +1744,8 @@ def endTagTable(self, token):
                 self.tree.generateImpliedEndTags()
                 if self.tree.openElements[-1].name != "table":
                     self.parser.parseError("end-tag-too-early-named",
-                      {"gotName": "table",
-                       "expectedName": self.tree.openElements[-1].name})
+                                           {"gotName": "table",
+                                            "expectedName": self.tree.openElements[-1].name})
                 while self.tree.openElements[-1].name != "table":
                     self.tree.openElements.pop()
                 self.tree.openElements.pop()
@@ -1775,7 +1774,7 @@ def __init__(self, parser, tree):
         def flushCharacters(self):
             data = "".join([item["data"] for item in self.characterTokens])
             if any([item not in spaceCharacters for item in data]):
-                token = {"type":tokenTypes["Characters"], "data":data}
+                token = {"type": tokenTypes["Characters"], "data": data}
                 self.parser.phases["inTable"].insertText(token)
             elif data:
                 self.tree.insertText(data)
@@ -1797,7 +1796,7 @@ def processCharacters(self, token):
             self.characterTokens.append(token)
 
         def processSpaceCharacters(self, token):
-            #pretty sure we should never reach here
+            # pretty sure we should never reach here
             self.characterTokens.append(token)
     #        assert False
 
@@ -1811,7 +1810,6 @@ def processEndTag(self, token):
             self.parser.phase = self.originalPhase
             return token
 
-
     class InCaptionPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
         def __init__(self, parser, tree):
@@ -1843,7 +1841,7 @@ def processCharacters(self, token):
 
         def startTagTableElement(self, token):
             self.parser.parseError()
-            #XXX Have to duplicate logic here to find out if the tag is ignored
+            # XXX Have to duplicate logic here to find out if the tag is ignored
             ignoreEndTag = self.ignoreEndTagCaption()
             self.parser.phase.processEndTag(impliedTagToken("caption"))
             if not ignoreEndTag:
@@ -1858,8 +1856,8 @@ def endTagCaption(self, token):
                 self.tree.generateImpliedEndTags()
                 if self.tree.openElements[-1].name != "caption":
                     self.parser.parseError("expected-one-end-tag-but-got-another",
-                      {"gotName": "caption",
-                       "expectedName": self.tree.openElements[-1].name})
+                                           {"gotName": "caption",
+                                            "expectedName": self.tree.openElements[-1].name})
                 while self.tree.openElements[-1].name != "caption":
                     self.tree.openElements.pop()
                 self.tree.openElements.pop()
@@ -1883,7 +1881,6 @@ def endTagIgnore(self, token):
         def endTagOther(self, token):
             return self.parser.phases["inBody"].processEndTag(token)
 
-
     class InColumnGroupPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#in-column
 
@@ -1949,7 +1946,6 @@ def endTagOther(self, token):
             if not ignoreEndTag:
                 return token
 
-
     class InTableBodyPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
         def __init__(self, parser, tree):
@@ -1974,8 +1970,8 @@ def __init__(self, parser, tree):
         # helper methods
         def clearStackToTableBodyContext(self):
             while self.tree.openElements[-1].name not in ("tbody", "tfoot",
-              "thead", "html"):
-                #self.parser.parseError("unexpected-implied-end-tag-in-table",
+                                                          "thead", "html"):
+                # self.parser.parseError("unexpected-implied-end-tag-in-table",
                 #  {"name": self.tree.openElements[-1].name})
                 self.tree.openElements.pop()
             if self.tree.openElements[-1].name == "html":
@@ -2006,7 +2002,7 @@ def startTagTableOther(self, token):
             # XXX AT Any ideas on how to share this with endTagTable?
             if (self.tree.elementInScope("tbody", variant="table") or
                 self.tree.elementInScope("thead", variant="table") or
-                self.tree.elementInScope("tfoot", variant="table")):
+                    self.tree.elementInScope("tfoot", variant="table")):
                 self.clearStackToTableBodyContext()
                 self.endTagTableRowGroup(
                     impliedTagToken(self.tree.openElements[-1].name))
@@ -2026,12 +2022,12 @@ def endTagTableRowGroup(self, token):
                 self.parser.phase = self.parser.phases["inTable"]
             else:
                 self.parser.parseError("unexpected-end-tag-in-table-body",
-                  {"name": token["name"]})
+                                       {"name": token["name"]})
 
         def endTagTable(self, token):
             if (self.tree.elementInScope("tbody", variant="table") or
                 self.tree.elementInScope("thead", variant="table") or
-                self.tree.elementInScope("tfoot", variant="table")):
+                    self.tree.elementInScope("tfoot", variant="table")):
                 self.clearStackToTableBodyContext()
                 self.endTagTableRowGroup(
                     impliedTagToken(self.tree.openElements[-1].name))
@@ -2043,12 +2039,11 @@ def endTagTable(self, token):
 
         def endTagIgnore(self, token):
             self.parser.parseError("unexpected-end-tag-in-table-body",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
 
         def endTagOther(self, token):
             return self.parser.phases["inTable"].processEndTag(token)
 
-
     class InRowPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#in-row
         def __init__(self, parser, tree):
@@ -2066,7 +2061,7 @@ def __init__(self, parser, tree):
                 ("table", self.endTagTable),
                 (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
                 (("body", "caption", "col", "colgroup", "html", "td", "th"),
-                  self.endTagIgnore)
+                 self.endTagIgnore)
             ])
             self.endTagHandler.default = self.endTagOther
 
@@ -2074,7 +2069,7 @@ def __init__(self, parser, tree):
         def clearStackToTableRowContext(self):
             while self.tree.openElements[-1].name not in ("tr", "html"):
                 self.parser.parseError("unexpected-implied-end-tag-in-table-row",
-                  {"name": self.tree.openElements[-1].name})
+                                       {"name": self.tree.openElements[-1].name})
                 self.tree.openElements.pop()
 
         def ignoreEndTagTr(self):
@@ -2133,7 +2128,7 @@ def endTagTableRowGroup(self, token):
 
         def endTagIgnore(self, token):
             self.parser.parseError("unexpected-end-tag-in-table-row",
-                {"name": token["name"]})
+                                   {"name": token["name"]})
 
         def endTagOther(self, token):
             return self.parser.phases["inTable"].processEndTag(token)
@@ -2172,7 +2167,7 @@ def processCharacters(self, token):
 
         def startTagTableOther(self, token):
             if (self.tree.elementInScope("td", variant="table") or
-                self.tree.elementInScope("th", variant="table")):
+                    self.tree.elementInScope("th", variant="table")):
                 self.closeCell()
                 return token
             else:
@@ -2188,7 +2183,7 @@ def endTagTableCell(self, token):
                 self.tree.generateImpliedEndTags(token["name"])
                 if self.tree.openElements[-1].name != token["name"]:
                     self.parser.parseError("unexpected-cell-end-tag",
-                      {"name": token["name"]})
+                                           {"name": token["name"]})
                     while True:
                         node = self.tree.openElements.pop()
                         if node.name == token["name"]:
@@ -2277,19 +2272,19 @@ def startTagScript(self, token):
 
         def startTagOther(self, token):
             self.parser.parseError("unexpected-start-tag-in-select",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
 
         def endTagOption(self, token):
             if self.tree.openElements[-1].name == "option":
                 self.tree.openElements.pop()
             else:
                 self.parser.parseError("unexpected-end-tag-in-select",
-                  {"name": "option"})
+                                       {"name": "option"})
 
         def endTagOptgroup(self, token):
             # </optgroup> implicitly closes <option>
             if (self.tree.openElements[-1].name == "option" and
-                self.tree.openElements[-2].name == "optgroup"):
+                    self.tree.openElements[-2].name == "optgroup"):
                 self.tree.openElements.pop()
             # It also closes </optgroup>
             if self.tree.openElements[-1].name == "optgroup":
@@ -2297,7 +2292,7 @@ def endTagOptgroup(self, token):
             # But nothing else
             else:
                 self.parser.parseError("unexpected-end-tag-in-select",
-                  {"name": "optgroup"})
+                                       {"name": "optgroup"})
 
         def endTagSelect(self, token):
             if self.tree.elementInScope("select", variant="select"):
@@ -2312,8 +2307,7 @@ def endTagSelect(self, token):
 
         def endTagOther(self, token):
             self.parser.parseError("unexpected-end-tag-in-select",
-              {"name": token["name"]})
-
+                                   {"name": token["name"]})
 
     class InSelectInTablePhase(Phase):
         def __init__(self, parser, tree):
@@ -2354,56 +2348,56 @@ def endTagTable(self, token):
         def endTagOther(self, token):
             return self.parser.phases["inSelect"].processEndTag(token)
 
-
     class InForeignContentPhase(Phase):
         breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",
                                       "center", "code", "dd", "div", "dl", "dt",
                                       "em", "embed", "h1", "h2", "h3",
                                       "h4", "h5", "h6", "head", "hr", "i", "img",
                                       "li", "listing", "menu", "meta", "nobr",
-                                      "ol", "p", "pre", "ruby", "s",  "small",
-                                      "span", "strong", "strike",  "sub", "sup",
+                                      "ol", "p", "pre", "ruby", "s", "small",
+                                      "span", "strong", "strike", "sub", "sup",
                                       "table", "tt", "u", "ul", "var"])
+
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
 
         def adjustSVGTagNames(self, token):
-            replacements = {"altglyph":"altGlyph",
-                            "altglyphdef":"altGlyphDef",
-                            "altglyphitem":"altGlyphItem",
-                            "animatecolor":"animateColor",
-                            "animatemotion":"animateMotion",
-                            "animatetransform":"animateTransform",
-                            "clippath":"clipPath",
-                            "feblend":"feBlend",
-                            "fecolormatrix":"feColorMatrix",
-                            "fecomponenttransfer":"feComponentTransfer",
-                            "fecomposite":"feComposite",
-                            "feconvolvematrix":"feConvolveMatrix",
-                            "fediffuselighting":"feDiffuseLighting",
-                            "fedisplacementmap":"feDisplacementMap",
-                            "fedistantlight":"feDistantLight",
-                            "feflood":"feFlood",
-                            "fefunca":"feFuncA",
-                            "fefuncb":"feFuncB",
-                            "fefuncg":"feFuncG",
-                            "fefuncr":"feFuncR",
-                            "fegaussianblur":"feGaussianBlur",
-                            "feimage":"feImage",
-                            "femerge":"feMerge",
-                            "femergenode":"feMergeNode",
-                            "femorphology":"feMorphology",
-                            "feoffset":"feOffset",
-                            "fepointlight":"fePointLight",
-                            "fespecularlighting":"feSpecularLighting",
-                            "fespotlight":"feSpotLight",
-                            "fetile":"feTile",
-                            "feturbulence":"feTurbulence",
-                            "foreignobject":"foreignObject",
-                            "glyphref":"glyphRef",
-                            "lineargradient":"linearGradient",
-                            "radialgradient":"radialGradient",
-                            "textpath":"textPath"}
+            replacements = {"altglyph": "altGlyph",
+                            "altglyphdef": "altGlyphDef",
+                            "altglyphitem": "altGlyphItem",
+                            "animatecolor": "animateColor",
+                            "animatemotion": "animateMotion",
+                            "animatetransform": "animateTransform",
+                            "clippath": "clipPath",
+                            "feblend": "feBlend",
+                            "fecolormatrix": "feColorMatrix",
+                            "fecomponenttransfer": "feComponentTransfer",
+                            "fecomposite": "feComposite",
+                            "feconvolvematrix": "feConvolveMatrix",
+                            "fediffuselighting": "feDiffuseLighting",
+                            "fedisplacementmap": "feDisplacementMap",
+                            "fedistantlight": "feDistantLight",
+                            "feflood": "feFlood",
+                            "fefunca": "feFuncA",
+                            "fefuncb": "feFuncB",
+                            "fefuncg": "feFuncG",
+                            "fefuncr": "feFuncR",
+                            "fegaussianblur": "feGaussianBlur",
+                            "feimage": "feImage",
+                            "femerge": "feMerge",
+                            "femergenode": "feMergeNode",
+                            "femorphology": "feMorphology",
+                            "feoffset": "feOffset",
+                            "fepointlight": "fePointLight",
+                            "fespecularlighting": "feSpecularLighting",
+                            "fespotlight": "feSpotLight",
+                            "fetile": "feTile",
+                            "feturbulence": "feTurbulence",
+                            "foreignobject": "foreignObject",
+                            "glyphref": "glyphRef",
+                            "lineargradient": "linearGradient",
+                            "radialgradient": "radialGradient",
+                            "textpath": "textPath"}
 
             if token["name"] in replacements:
                 token["name"] = replacements[token["name"]]
@@ -2451,7 +2445,7 @@ def processEndTag(self, token):
 
             while True:
                 if node.name.translate(asciiUpper2Lower) == token["name"]:
-                    #XXX this isn't in the spec but it seems necessary
+                    # XXX this isn't in the spec but it seems necessary
                     if self.parser.phase == self.parser.phases["inTableText"]:
                         self.parser.phase.flushCharacters()
                         self.parser.phase = self.parser.phase.originalPhase
@@ -2469,21 +2463,20 @@ def processEndTag(self, token):
                     break
             return new_token
 
-
     class AfterBodyPhase(Phase):
         def __init__(self, parser, tree):
             Phase.__init__(self, parser, tree)
 
             self.startTagHandler = utils.MethodDispatcher([
-                    ("html", self.startTagHtml)
-                    ])
+                ("html", self.startTagHtml)
+            ])
             self.startTagHandler.default = self.startTagOther
 
             self.endTagHandler = utils.MethodDispatcher([("html", self.endTagHtml)])
             self.endTagHandler.default = self.endTagOther
 
         def processEOF(self):
-            #Stop parsing
+            # Stop parsing
             pass
 
         def processComment(self, token):
@@ -2501,11 +2494,11 @@ def startTagHtml(self, token):
 
         def startTagOther(self, token):
             self.parser.parseError("unexpected-start-tag-after-body",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
             self.parser.phase = self.parser.phases["inBody"]
             return token
 
-        def endTagHtml(self,name):
+        def endTagHtml(self, name):
             if self.parser.innerHTML:
                 self.parser.parseError("unexpected-end-tag-after-body-innerhtml")
             else:
@@ -2513,7 +2506,7 @@ def endTagHtml(self,name):
 
         def endTagOther(self, token):
             self.parser.parseError("unexpected-end-tag-after-body",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
             self.parser.phase = self.parser.phases["inBody"]
             return token
 
@@ -2556,7 +2549,7 @@ def startTagNoframes(self, token):
 
         def startTagOther(self, token):
             self.parser.parseError("unexpected-start-tag-in-frameset",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
 
         def endTagFrameset(self, token):
             if self.tree.openElements[-1].name == "html":
@@ -2565,15 +2558,14 @@ def endTagFrameset(self, token):
             else:
                 self.tree.openElements.pop()
             if (not self.parser.innerHTML and
-                self.tree.openElements[-1].name != "frameset"):
+                    self.tree.openElements[-1].name != "frameset"):
                 # If we're not in innerHTML mode and the the current node is not a
                 # "frameset" element (anymore) then switch.
                 self.parser.phase = self.parser.phases["afterFrameset"]
 
         def endTagOther(self, token):
             self.parser.parseError("unexpected-end-tag-in-frameset",
-              {"name": token["name"]})
-
+                                   {"name": token["name"]})
 
     class AfterFramesetPhase(Phase):
         # http://www.whatwg.org/specs/web-apps/current-work/#after3
@@ -2592,7 +2584,7 @@ def __init__(self, parser, tree):
             self.endTagHandler.default = self.endTagOther
 
         def processEOF(self):
-            #Stop parsing
+            # Stop parsing
             pass
 
         def processCharacters(self, token):
@@ -2603,15 +2595,14 @@ def startTagNoframes(self, token):
 
         def startTagOther(self, token):
             self.parser.parseError("unexpected-start-tag-after-frameset",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
 
         def endTagHtml(self, token):
             self.parser.phase = self.parser.phases["afterAfterFrameset"]
 
         def endTagOther(self, token):
             self.parser.parseError("unexpected-end-tag-after-frameset",
-              {"name": token["name"]})
-
+                                   {"name": token["name"]})
 
     class AfterAfterBodyPhase(Phase):
         def __init__(self, parser, tree):
@@ -2641,13 +2632,13 @@ def startTagHtml(self, token):
 
         def startTagOther(self, token):
             self.parser.parseError("expected-eof-but-got-start-tag",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
             self.parser.phase = self.parser.phases["inBody"]
             return token
 
         def processEndTag(self, token):
             self.parser.parseError("expected-eof-but-got-end-tag",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
             self.parser.phase = self.parser.phases["inBody"]
             return token
 
@@ -2681,12 +2672,11 @@ def startTagNoFrames(self, token):
 
         def startTagOther(self, token):
             self.parser.parseError("expected-eof-but-got-start-tag",
-              {"name": token["name"]})
+                                   {"name": token["name"]})
 
         def processEndTag(self, token):
             self.parser.parseError("expected-eof-but-got-end-tag",
-              {"name": token["name"]})
-
+                                   {"name": token["name"]})
 
     return {
         "initial": InitialPhase,
@@ -2713,14 +2703,16 @@ def processEndTag(self, token):
         "afterAfterBody": AfterAfterBodyPhase,
         "afterAfterFrameset": AfterAfterFramesetPhase,
         # XXX after after frameset
-        }
+    }
+
 
-def impliedTagToken(name, type="EndTag", attributes = None,
-                    selfClosing = False):
+def impliedTagToken(name, type="EndTag", attributes=None,
+                    selfClosing=False):
     if attributes is None:
         attributes = {}
-    return {"type":tokenTypes[type], "name":name, "data":attributes,
-            "selfClosing":selfClosing}
+    return {"type": tokenTypes[type], "name": name, "data": attributes,
+            "selfClosing": selfClosing}
+
 
 class ParseError(Exception):
     """Error in parsed document"""
diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py
index f6f30fcf..c9ad69c5 100644
--- a/html5lib/ihatexml.py
+++ b/html5lib/ihatexml.py
@@ -5,26 +5,101 @@
 
 from .constants import DataLossWarning
 
-baseChar = """[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | #x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | [#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | [#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | #x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]"""
+baseChar = """
+[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] |
+[#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] |
+[#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] |
+[#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 |
+[#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] |
+[#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] |
+[#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] |
+[#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] |
+[#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 |
+[#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] |
+[#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] |
+[#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D |
+[#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] |
+[#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] |
+[#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] |
+[#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] |
+[#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] |
+[#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] |
+[#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 |
+[#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] |
+[#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] |
+[#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] |
+[#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] |
+[#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] |
+[#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] |
+[#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] |
+[#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] |
+[#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] |
+[#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] |
+[#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A |
+#x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 |
+#x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] |
+#x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] |
+[#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] |
+[#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C |
+#x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 |
+[#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] |
+[#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] |
+[#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 |
+[#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] |
+[#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B |
+#x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE |
+[#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] |
+[#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 |
+[#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] |
+[#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]"""
 
 ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]"""
 
-combiningCharacter = """[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | #x3099 | #x309A"""
-
-digit = """[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]"""
-
-extender = """#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]"""
+combiningCharacter = """
+[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] |
+[#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 |
+[#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] |
+[#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] |
+#x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] |
+[#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] |
+[#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 |
+#x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] |
+[#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC |
+[#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] |
+#x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] |
+[#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] |
+[#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] |
+[#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] |
+[#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] |
+[#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] |
+#x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 |
+[#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] |
+#x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] |
+[#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] |
+[#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] |
+#x3099 | #x309A"""
+
+digit = """
+[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] |
+[#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] |
+[#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] |
+[#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]"""
+
+extender = """
+#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 |
+#[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]"""
 
 letter = " | ".join([baseChar, ideographic])
 
-#Without the
+# Without the
 name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter,
-                       extender])
+                   extender])
 nameFirst = " | ".join([letter, "_"])
 
 reChar = re.compile(r"#x([\d|A-F]{4,4})")
 reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]")
 
+
 def charStringToList(chars):
     charRanges = [item.strip() for item in chars.split(" | ")]
     rv = []
@@ -35,7 +110,7 @@ def charStringToList(chars):
             if match is not None:
                 rv.append([hexToInt(item) for item in match.groups()])
                 if len(rv[-1]) == 1:
-                    rv[-1] = rv[-1]*2
+                    rv[-1] = rv[-1] * 2
                 foundMatch = True
                 break
         if not foundMatch:
@@ -45,6 +120,7 @@ def charStringToList(chars):
     rv = normaliseCharList(rv)
     return rv
 
+
 def normaliseCharList(charList):
     charList = sorted(charList)
     for item in charList:
@@ -54,41 +130,45 @@ def normaliseCharList(charList):
     while i < len(charList):
         j = 1
         rv.append(charList[i])
-        while i + j < len(charList) and charList[i+j][0] <= rv[-1][1] + 1:
-            rv[-1][1] = charList[i+j][1]
+        while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1:
+            rv[-1][1] = charList[i + j][1]
             j += 1
         i += j
     return rv
 
-#We don't really support characters above the BMP :(
+# We don't really support characters above the BMP :(
 max_unicode = int("FFFF", 16)
 
+
 def missingRanges(charList):
     rv = []
     if charList[0] != 0:
         rv.append([0, charList[0][0] - 1])
     for i, item in enumerate(charList[:-1]):
-        rv.append([item[1]+1, charList[i+1][0] - 1])
+        rv.append([item[1] + 1, charList[i + 1][0] - 1])
     if charList[-1][1] != max_unicode:
         rv.append([charList[-1][1] + 1, max_unicode])
     return rv
 
+
 def listToRegexpStr(charList):
     rv = []
     for item in charList:
         if item[0] == item[1]:
-           rv.append(escapeRegexp(chr(item[0])))
+            rv.append(escapeRegexp(chr(item[0])))
         else:
             rv.append(escapeRegexp(chr(item[0])) + "-" +
                       escapeRegexp(chr(item[1])))
-    return "[%s]"%"".join(rv)
+    return "[%s]" % "".join(rv)
+
 
 def hexToInt(hex_str):
     return int(hex_str, 16)
 
+
 def escapeRegexp(string):
     specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}",
-                          "[", "]", "|", "(", ")", "-")
+                         "[", "]", "|", "(", ")", "-")
     for char in specialCharacters:
         string = string.replace(char, "\\" + char)
         if char in string:
@@ -96,19 +176,21 @@ def escapeRegexp(string):
 
     return string
 
-#output from the above
+# output from the above
 nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
 
 nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
 
+
 class InfosetFilter(object):
     replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
-    def __init__(self, replaceChars = None,
-                 dropXmlnsLocalName = False,
-                 dropXmlnsAttrNs = False,
-                 preventDoubleDashComments = False,
-                 preventDashAtCommentEnd = False,
-                 replaceFormFeedCharacters = True):
+
+    def __init__(self, replaceChars=None,
+                 dropXmlnsLocalName=False,
+                 dropXmlnsAttrNs=False,
+                 preventDoubleDashComments=False,
+                 preventDashAtCommentEnd=False,
+                 replaceFormFeedCharacters=True):
 
         self.dropXmlnsLocalName = dropXmlnsLocalName
         self.dropXmlnsAttrNs = dropXmlnsAttrNs
@@ -146,7 +228,7 @@ def coerceCharacters(self, data):
             for i in range(data.count("\x0C")):
                 warnings.warn("Text cannot contain U+000C", DataLossWarning)
             data = data.replace("\x0C", " ")
-        #Other non-xml characters
+        # Other non-xml characters
         return data
 
     def toXmlName(self, name):
diff --git a/html5lib/inputstream.py b/html5lib/inputstream.py
index 65875b85..159901be 100644
--- a/html5lib/inputstream.py
+++ b/html5lib/inputstream.py
@@ -22,7 +22,7 @@
     class BufferedIOBase(object):
         pass
 
-#Non-unicode versions of constants for use in the pre-parser
+# Non-unicode versions of constants for use in the pre-parser
 spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
 asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
 asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
@@ -43,6 +43,7 @@ class BufferedIOBase(object):
 # Cache for charsUntil()
 charsUntilRegEx = {}
 
+
 class BufferedStream:
     """Buffering for streams that do not have buffering of their own
 
@@ -53,7 +54,7 @@ class BufferedStream:
     def __init__(self, stream):
         self.stream = stream
         self.buffer = []
-        self.position = [-1,0] #chunk number, offset
+        self.position = [-1, 0]  # chunk number, offset
 
     def tell(self):
         pos = 0
@@ -159,7 +160,7 @@ def __init__(self, source):
 
         """
 
-        #Craziness
+        # Craziness
         if len("\U0010FFFF") == 1:
             self.reportCharacterErrors = self.characterErrorsUCS4
             self.replaceCharactersRegexp = re.compile("[\uD800-\uDFFF]")
@@ -186,7 +187,7 @@ def reset(self):
         # number of columns in the last line of the previous chunk
         self.prevNumCols = 0
 
-        #Deal with CR LF and surrogates split over chunk boundaries
+        # Deal with CR LF and surrogates split over chunk boundaries
         self._bufferedCharacter = None
 
     def openStream(self, source):
@@ -201,10 +202,10 @@ def openStream(self, source):
         else:
             stream = StringIO(source)
 
-        if (#not isinstance(stream, BufferedIOBase) and
+        if (  # not isinstance(stream, BufferedIOBase) and
             not(hasattr(stream, "tell") and
                 hasattr(stream, "seek")) or
-            stream is sys.stdin):
+                stream is sys.stdin):
             stream = BufferedStream(stream)
 
         return stream
@@ -223,7 +224,7 @@ def _position(self, offset):
     def position(self):
         """Returns (line, col) of the current position in the stream."""
         line, col = self._position(self.chunkOffset)
-        return (line+1, col)
+        return (line + 1, col)
 
     def char(self):
         """ Read one character from the stream or queue if available. Return
@@ -252,7 +253,7 @@ def readChunk(self, chunkSize=None):
 
         data = self.dataStream.read(chunkSize)
 
-        #Deal with CR LF and surrogates broken across chunks
+        # Deal with CR LF and surrogates broken across chunks
         if self._bufferedCharacter:
             data = self._bufferedCharacter + data
             self._bufferedCharacter = None
@@ -285,18 +286,18 @@ def characterErrorsUCS4(self, data):
             self.errors.append("invalid-codepoint")
 
     def characterErrorsUCS2(self, data):
-        #Someone picked the wrong compile option
-        #You lose
+        # Someone picked the wrong compile option
+        # You lose
         skip = False
         for match in invalid_unicode_re.finditer(data):
             if skip:
                 continue
             codepoint = ord(match.group())
             pos = match.start()
-            #Pretty sure there should be endianness issues here
-            if utils.isSurrogatePair(data[pos:pos+2]):
-                #We have a surrogate pair!
-                char_val = utils.surrogatePairToCodepoint(data[pos:pos+2])
+            # Pretty sure there should be endianness issues here
+            if utils.isSurrogatePair(data[pos:pos + 2]):
+                # We have a surrogate pair!
+                char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2])
                 if char_val in non_bmp_invalid_codepoints:
                     self.errors.append("invalid-codepoint")
                 skip = True
@@ -307,7 +308,7 @@ def characterErrorsUCS2(self, data):
                 skip = False
                 self.errors.append("invalid-codepoint")
 
-    def charsUntil(self, characters, opposite = False):
+    def charsUntil(self, characters, opposite=False):
         """ Returns a string of characters from the stream up to but not
         including any character in 'characters' or EOF. 'characters' must be
         a container that supports the 'in' method and iteration over its
@@ -370,6 +371,7 @@ def unget(self, char):
                 self.chunkOffset -= 1
                 assert self.chunk[self.chunkOffset] == char
 
+
 class HTMLBinaryInputStream(HTMLUnicodeInputStream):
     """Provides a unicode stream of characters to the HTMLTokenizer.
 
@@ -403,19 +405,19 @@ def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
         self.charEncoding = (codecName(encoding), "certain")
 
         # Encoding Information
-        #Number of bytes to use when looking for a meta element with
-        #encoding information
+        # Number of bytes to use when looking for a meta element with
+        # encoding information
         self.numBytesMeta = 512
-        #Number of bytes to use when using detecting encoding using chardet
+        # Number of bytes to use when using detecting encoding using chardet
         self.numBytesChardet = 100
-        #Encoding to use if no other information can be found
+        # Encoding to use if no other information can be found
         self.defaultEncoding = "windows-1252"
 
-        #Detect encoding iff no explicit "transport level" encoding is supplied
+        # Detect encoding iff no explicit "transport level" encoding is supplied
         if (self.charEncoding[0] is None):
             self.charEncoding = self.detectEncoding(parseMeta, chardet)
 
-        #Call superclass
+        # Call superclass
         self.reset()
 
     def reset(self):
@@ -436,22 +438,22 @@ def openStream(self, source):
             stream = BytesIO(source)
 
         if (not(hasattr(stream, "tell") and hasattr(stream, "seek")) or
-            stream is sys.stdin):
+                stream is sys.stdin):
             stream = BufferedStream(stream)
 
         return stream
 
     def detectEncoding(self, parseMeta=True, chardet=True):
-        #First look for a BOM
-        #This will also read past the BOM if present
+        # First look for a BOM
+        # This will also read past the BOM if present
         encoding = self.detectBOM()
         confidence = "certain"
-        #If there is no BOM need to look for meta elements with encoding
-        #information
+        # If there is no BOM need to look for meta elements with encoding
+        # information
         if encoding is None and parseMeta:
             encoding = self.detectEncodingMeta()
             confidence = "tentative"
-        #Guess with chardet, if avaliable
+        # Guess with chardet, if avaliable
         if encoding is None and chardet:
             confidence = "tentative"
             try:
@@ -472,11 +474,11 @@ def detectEncoding(self, parseMeta=True, chardet=True):
                 pass
         # If all else fails use the default encoding
         if encoding is None:
-            confidence="tentative"
+            confidence = "tentative"
             encoding = self.defaultEncoding
 
-        #Substitute for equivalent encodings:
-        encodingSub = {"iso-8859-1":"windows-1252"}
+        # Substitute for equivalent encodings:
+        encodingSub = {"iso-8859-1": "windows-1252"}
 
         if encoding.lower() in encodingSub:
             encoding = encodingSub[encoding.lower()]
@@ -496,7 +498,7 @@ def changeEncoding(self, newEncoding):
             self.rawStream.seek(0)
             self.reset()
             self.charEncoding = (newEncoding, "certain")
-            raise ReparseException("Encoding changed from %s to %s"%(self.charEncoding[0], newEncoding))
+            raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
 
     def detectBOM(self):
         """Attempts to detect at BOM at the start of the stream. If
@@ -520,7 +522,7 @@ def detectBOM(self):
             encoding = bomDict.get(string)         # UTF-32
             seek = 4
             if not encoding:
-                encoding = bomDict.get(string[:2]) # UTF-16
+                encoding = bomDict.get(string[:2])  # UTF-16
                 seek = 2
 
         # Set the read position past the BOM if one was found, otherwise
@@ -543,6 +545,7 @@ def detectEncodingMeta(self):
 
         return encoding
 
+
 class EncodingBytes(bytes):
     """String-like object with an associated position and various extra methods
     If the position is ever greater than the string length then an exception is
@@ -552,7 +555,7 @@ def __new__(self, value):
         return bytes.__new__(self, value.lower())
 
     def __init__(self, value):
-        self._position=-1
+        self._position = -1
 
     def __iter__(self):
         return self
@@ -563,7 +566,7 @@ def __next__(self):
             raise StopIteration
         elif p < 0:
             raise TypeError
-        return self[p:p+1]
+        return self[p:p + 1]
 
     def next(self):
         # Py2 compat
@@ -576,7 +579,7 @@ def previous(self):
         elif p < 0:
             raise TypeError
         self._position = p = p - 1
-        return self[p:p+1]
+        return self[p:p + 1]
 
     def setPosition(self, position):
         if self._position >= len(self):
@@ -594,7 +597,7 @@ def getPosition(self):
     position = property(getPosition, setPosition)
 
     def getCurrentByte(self):
-        return self[self.position:self.position+1]
+        return self[self.position:self.position + 1]
 
     currentByte = property(getCurrentByte)
 
@@ -602,7 +605,7 @@ def skip(self, chars=spaceCharactersBytes):
         """Skip past a list of characters"""
         p = self.position               # use property for the error-checking
         while p < len(self):
-            c = self[p:p+1]
+            c = self[p:p + 1]
             if c not in chars:
                 self._position = p
                 return c
@@ -613,7 +616,7 @@ def skip(self, chars=spaceCharactersBytes):
     def skipUntil(self, chars):
         p = self.position
         while p < len(self):
-            c = self[p:p+1]
+            c = self[p:p + 1]
             if c in chars:
                 self._position = p
                 return c
@@ -626,7 +629,7 @@ def matchBytes(self, bytes):
         are found return True and advance the position to the byte after the
         match. Otherwise return False and leave the position alone"""
         p = self.position
-        data = self[p:p+len(bytes)]
+        data = self[p:p + len(bytes)]
         rv = data.startswith(bytes)
         if rv:
             self.position += len(bytes)
@@ -640,7 +643,7 @@ def jumpTo(self, bytes):
             # XXX: This is ugly, but I can't see a nicer way to fix this.
             if self._position == -1:
                 self._position = 0
-            self._position += (newPosition + len(bytes)-1)
+            self._position += (newPosition + len(bytes) - 1)
             return True
         else:
             raise StopIteration
@@ -656,12 +659,12 @@ def __init__(self, data):
 
     def getEncoding(self):
         methodDispatch = (
-            (b"<!--",self.handleComment),
-            (b"<meta",self.handleMeta),
-            (b"</",self.handlePossibleEndTag),
-            (b"<!",self.handleOther),
-            (b"<?",self.handleOther),
-            (b"<",self.handlePossibleStartTag))
+            (b"<!--", self.handleComment),
+            (b"<meta", self.handleMeta),
+            (b"</", self.handlePossibleEndTag),
+            (b"<!", self.handleOther),
+            (b"<?", self.handleOther),
+            (b"<", self.handlePossibleStartTag))
         for byte in self.data:
             keepParsing = True
             for key, method in methodDispatch:
@@ -670,7 +673,7 @@ def getEncoding(self):
                         keepParsing = method()
                         break
                     except StopIteration:
-                        keepParsing=False
+                        keepParsing = False
                         break
             if not keepParsing:
                 break
@@ -683,13 +686,13 @@ def handleComment(self):
 
     def handleMeta(self):
         if self.data.currentByte not in spaceCharactersBytes:
-            #if we have <meta not followed by a space so just keep going
+            # if we have <meta not followed by a space so just keep going
             return True
-        #We have a valid meta element we want to search for attributes
+        # We have a valid meta element we want to search for attributes
         hasPragma = False
         pendingEncoding = None
         while True:
-            #Try to find the next attribute after the current position
+            # Try to find the next attribute after the current position
             attr = self.getAttribute()
             if attr is None:
                 return True
@@ -727,9 +730,9 @@ def handlePossibleEndTag(self):
     def handlePossibleTag(self, endTag):
         data = self.data
         if data.currentByte not in asciiLettersBytes:
-            #If the next byte is not an ascii letter either ignore this
-            #fragment (possible start tag case) or treat it according to
-            #handleOther
+            # If the next byte is not an ascii letter either ignore this
+            # fragment (possible start tag case) or treat it according to
+            # handleOther
             if endTag:
                 data.previous()
                 self.handleOther()
@@ -737,11 +740,11 @@ def handlePossibleTag(self, endTag):
 
         c = data.skipUntil(spacesAngleBrackets)
         if c == b"<":
-            #return to the first step in the overall "two step" algorithm
-            #reprocessing the < byte
+            # return to the first step in the overall "two step" algorithm
+            # reprocessing the < byte
             data.previous()
         else:
-            #Read all attributes
+            # Read all attributes
             attr = self.getAttribute()
             while attr is not None:
                 attr = self.getAttribute()
@@ -763,47 +766,47 @@ def getAttribute(self):
         # Step 3
         attrName = []
         attrValue = []
-        #Step 4 attribute name
+        # Step 4 attribute name
         while True:
             if c == b"=" and attrName:
                 break
             elif c in spaceCharactersBytes:
-                #Step 6!
+                # Step 6!
                 c = data.skip()
                 break
             elif c in (b"/", b">"):
                 return b"".join(attrName), b""
             elif c in asciiUppercaseBytes:
                 attrName.append(c.lower())
-            elif c == None:
+            elif c is None:
                 return None
             else:
                 attrName.append(c)
-            #Step 5
+            # Step 5
             c = next(data)
-        #Step 7
+        # Step 7
         if c != b"=":
             data.previous()
             return b"".join(attrName), b""
-        #Step 8
+        # Step 8
         next(data)
-        #Step 9
+        # Step 9
         c = data.skip()
-        #Step 10
+        # Step 10
         if c in (b"'", b'"'):
-            #10.1
+            # 10.1
             quoteChar = c
             while True:
-                #10.2
+                # 10.2
                 c = next(data)
-                #10.3
+                # 10.3
                 if c == quoteChar:
                     next(data)
                     return b"".join(attrName), b"".join(attrValue)
-                #10.4
+                # 10.4
                 elif c in asciiUppercaseBytes:
                     attrValue.append(c.lower())
-                #10.5
+                # 10.5
                 else:
                     attrValue.append(c)
         elif c == b">":
@@ -831,19 +834,20 @@ class ContentAttrParser(object):
     def __init__(self, data):
         assert isinstance(data, bytes)
         self.data = data
+
     def parse(self):
         try:
-            #Check if the attr name is charset
-            #otherwise return
+            # Check if the attr name is charset
+            # otherwise return
             self.data.jumpTo(b"charset")
             self.data.position += 1
             self.data.skip()
             if not self.data.currentByte == b"=":
-                #If there is no = sign keep looking for attrs
+                # If there is no = sign keep looking for attrs
                 return None
             self.data.position += 1
             self.data.skip()
-            #Look for an encoding between matching quote marks
+            # Look for an encoding between matching quote marks
             if self.data.currentByte in (b'"', b"'"):
                 quoteMark = self.data.currentByte
                 self.data.position += 1
@@ -853,13 +857,13 @@ def parse(self):
                 else:
                     return None
             else:
-                #Unquoted value
+                # Unquoted value
                 oldPosition = self.data.position
                 try:
                     self.data.skipUntil(spaceCharactersBytes)
                     return self.data[oldPosition:self.data.position]
                 except StopIteration:
-                    #Return the whole remaining value
+                    # Return the whole remaining value
                     return self.data[oldPosition:]
         except StopIteration:
             return None
diff --git a/html5lib/sanitizer.py b/html5lib/sanitizer.py
index 9c7d342d..2f63a471 100644
--- a/html5lib/sanitizer.py
+++ b/html5lib/sanitizer.py
@@ -6,138 +6,139 @@
 from .tokenizer import HTMLTokenizer
 from .constants import tokenTypes
 
+
 class HTMLSanitizerMixin(object):
     """ sanitization of XHTML+MathML+SVG and of inline style attributes."""
 
     acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area',
-        'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
-        'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
-        'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
-        'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
-        'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
-        'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
-        'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
-        'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
-        'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
-        'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
-        'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
-        'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
+                           'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
+                           'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
+                           'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
+                           'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
+                           'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
+                           'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
+                           'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
+                           'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
+                           'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
+                           'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
+                           'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
+                           'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
 
     mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
-        'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
-        'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
-        'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
-        'munderover', 'none']
+                       'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
+                       'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
+                       'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
+                       'munderover', 'none']
 
     svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
-        'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
-        'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
-        'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
-        'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
-        'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
+                    'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
+                    'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
+                    'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
+                    'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
+                    'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
 
     acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
-        'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
-        'background', 'balance', 'bgcolor', 'bgproperties', 'border',
-        'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
-        'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
-        'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
-        'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
-        'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
-        'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
-        'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
-        'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
-        'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
-        'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
-        'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
-        'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
-        'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload',
-        'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
-        'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
-        'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
-        'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
-        'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
-        'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
-        'width', 'wrap', 'xml:lang']
+                             'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
+                             'background', 'balance', 'bgcolor', 'bgproperties', 'border',
+                             'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
+                             'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
+                             'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
+                             'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
+                             'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
+                             'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
+                             'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
+                             'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
+                             'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
+                             'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
+                             'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
+                             'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
+                             'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload',
+                             'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
+                             'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
+                             'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
+                             'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
+                             'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
+                             'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
+                             'width', 'wrap', 'xml:lang']
 
     mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
-        'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
-        'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
-        'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
-        'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
-        'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
-        'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
-        'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
-        'xlink:type', 'xmlns', 'xmlns:xlink']
+                         'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
+                         'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
+                         'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
+                         'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
+                         'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
+                         'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
+                         'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
+                         'xlink:type', 'xmlns', 'xmlns:xlink']
 
     svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
-        'arabic-form', 'ascent', 'attributeName', 'attributeType',
-        'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
-        'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
-        'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
-        'fill-opacity', 'fill-rule', 'font-family', 'font-size',
-        'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
-        'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
-        'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
-        'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
-        'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
-        'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
-        'opacity', 'orient', 'origin', 'overline-position',
-        'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
-        'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
-        'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
-        'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
-        'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
-        'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
-        'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
-        'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
-        'transform', 'type', 'u1', 'u2', 'underline-position',
-        'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
-        'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
-        'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
-        'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
-        'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
-        'y1', 'y2', 'zoomAndPan']
+                      'arabic-form', 'ascent', 'attributeName', 'attributeType',
+                      'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
+                      'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
+                      'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
+                      'fill-opacity', 'fill-rule', 'font-family', 'font-size',
+                      'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
+                      'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
+                      'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
+                      'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
+                      'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
+                      'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
+                      'opacity', 'orient', 'origin', 'overline-position',
+                      'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
+                      'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
+                      'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
+                      'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
+                      'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
+                      'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
+                      'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
+                      'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
+                      'transform', 'type', 'u1', 'u2', 'underline-position',
+                      'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
+                      'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
+                      'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
+                      'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
+                      'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
+                      'y1', 'y2', 'zoomAndPan']
 
     attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster',
-        'xlink:href', 'xml:base']
+                       'xlink:href', 'xml:base']
 
     svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
-        'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
-        'mask', 'stroke']
+                               'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
+                               'mask', 'stroke']
 
     svg_allow_local_href = ['altGlyph', 'animate', 'animateColor',
-        'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter',
-        'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref',
-        'set', 'use']
+                            'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter',
+                            'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref',
+                            'set', 'use']
 
     acceptable_css_properties = ['azimuth', 'background-color',
-        'border-bottom-color', 'border-collapse', 'border-color',
-        'border-left-color', 'border-right-color', 'border-top-color', 'clear',
-        'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
-        'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
-        'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
-        'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
-        'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
-        'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
-        'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
-        'white-space', 'width']
+                                 'border-bottom-color', 'border-collapse', 'border-color',
+                                 'border-left-color', 'border-right-color', 'border-top-color', 'clear',
+                                 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
+                                 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
+                                 'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
+                                 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
+                                 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
+                                 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
+                                 'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
+                                 'white-space', 'width']
 
     acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue',
-        'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
-        'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
-        'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
-        'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
-        'transparent', 'underline', 'white', 'yellow']
+                               'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
+                               'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
+                               'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
+                               'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
+                               'transparent', 'underline', 'white', 'yellow']
 
-    acceptable_svg_properties = [ 'fill', 'fill-opacity', 'fill-rule',
-        'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
-        'stroke-opacity']
+    acceptable_svg_properties = ['fill', 'fill-opacity', 'fill-rule',
+                                 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
+                                 'stroke-opacity']
 
-    acceptable_protocols = [ 'ed2k', 'ftp', 'http', 'https', 'irc',
-        'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
-        'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
-        'ssh', 'sftp', 'rtsp', 'afs' ]
+    acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc',
+                            'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
+                            'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
+                            'ssh', 'sftp', 'rtsp', 'afs']
 
     # subclasses may define their own versions of these constants
     allowed_elements = acceptable_elements + mathml_elements + svg_elements
@@ -163,13 +164,13 @@ def sanitize_token(self, token):
         # accommodate filters which use token_type differently
         token_type = token["type"]
         if token_type in list(tokenTypes.keys()):
-          token_type = tokenTypes[token_type]
+            token_type = tokenTypes[token_type]
 
         if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"],
-                             tokenTypes["EmptyTag"]):
+                          tokenTypes["EmptyTag"]):
             if token["name"] in self.allowed_elements:
                 if "data" in token:
-                    attrs = dict([(name,val) for name,val in
+                    attrs = dict([(name, val) for name, val in
                                   token["data"][::-1]
                                   if name in self.allowed_attributes])
                     for attr in self.attr_val_is_uri:
@@ -177,9 +178,9 @@ def sanitize_token(self, token):
                             continue
                         val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                                unescape(attrs[attr])).lower()
-                        #remove replacement characters from unescaped characters
+                        # remove replacement characters from unescaped characters
                         val_unescaped = val_unescaped.replace("\ufffd", "")
-                        if (re.match("^[a-z0-9][-+.a-z0-9]*:",val_unescaped) and
+                        if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped) and
                             (val_unescaped.split(':')[0] not in
                              self.allowed_protocols)):
                             del attrs[attr]
@@ -194,18 +195,18 @@ def sanitize_token(self, token):
                         del attrs['xlink:href']
                     if 'style' in attrs:
                         attrs['style'] = self.sanitize_css(attrs['style'])
-                    token["data"] = [[name,val] for name,val in list(attrs.items())]
+                    token["data"] = [[name, val] for name, val in list(attrs.items())]
                 return token
             else:
                 if token_type == tokenTypes["EndTag"]:
                     token["data"] = "</%s>" % token["name"]
                 elif token["data"]:
-                    attrs = ''.join([' %s="%s"' % (k,escape(v)) for k,v in token["data"]])
-                    token["data"] = "<%s%s>" % (token["name"],attrs)
+                    attrs = ''.join([' %s="%s"' % (k, escape(v)) for k, v in token["data"]])
+                    token["data"] = "<%s%s>" % (token["name"], attrs)
                 else:
                     token["data"] = "<%s>" % token["name"]
                 if token.get("selfClosing"):
-                    token["data"]=token["data"][:-1] + "/>"
+                    token["data"] = token["data"][:-1] + "/>"
 
                 if token["type"] in list(tokenTypes.keys()):
                     token["type"] = "Characters"
@@ -221,35 +222,39 @@ def sanitize_token(self, token):
 
     def sanitize_css(self, style):
         # disallow urls
-        style=re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ',style)
+        style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
 
         # gauntlet
-        if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): return ''
-        if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): return ''
+        if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
+            return ''
+        if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
+            return ''
 
         clean = []
-        for prop,value in re.findall("([-\w]+)\s*:\s*([^:;]*)",style):
-          if not value: continue
-          if prop.lower() in self.allowed_css_properties:
-              clean.append(prop + ': ' + value + ';')
-          elif prop.split('-')[0].lower() in ['background','border','margin',
-                                              'padding']:
-              for keyword in value.split():
-                  if not keyword in self.acceptable_css_keywords and \
-                      not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$",keyword):
-                      break
-              else:
-                  clean.append(prop + ': ' + value + ';')
-          elif prop.lower() in self.allowed_svg_properties:
-              clean.append(prop + ': ' + value + ';')
+        for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
+            if not value:
+                continue
+            if prop.lower() in self.allowed_css_properties:
+                clean.append(prop + ': ' + value + ';')
+            elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
+                                                'padding']:
+                for keyword in value.split():
+                    if not keyword in self.acceptable_css_keywords and \
+                            not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
+                        break
+                else:
+                    clean.append(prop + ': ' + value + ';')
+            elif prop.lower() in self.allowed_svg_properties:
+                clean.append(prop + ': ' + value + ';')
 
         return ' '.join(clean)
 
+
 class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
     def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
                  lowercaseElementName=False, lowercaseAttrName=False, parser=None):
-        #Change case matching defaults as we only output lowercase html anyway
-        #This solution doesn't seem ideal...
+        # Change case matching defaults as we only output lowercase html anyway
+        # This solution doesn't seem ideal...
         HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
                                lowercaseElementName, lowercaseAttrName, parser=parser)
 
diff --git a/html5lib/serializer/htmlserializer.py b/html5lib/serializer/htmlserializer.py
index ac6a4e41..c5ec773b 100644
--- a/html5lib/serializer/htmlserializer.py
+++ b/html5lib/serializer/htmlserializer.py
@@ -6,7 +6,7 @@
 
 try:
     from functools import reduce
-    pass # no-op statement to avoid 3to2 introducing parse error
+    pass  # no-op statement to avoid 3to2 introducing parse error
 except ImportError:
     pass
 
@@ -27,9 +27,9 @@
     encode_entity_map = {}
     is_ucs4 = len("\U0010FFFF") == 1
     for k, v in list(entities.items()):
-        #skip multi-character entities
+        # skip multi-character entities
         if ((is_ucs4 and len(v) > 1) or
-            (not is_ucs4 and len(v) > 2)):
+                (not is_ucs4 and len(v) > 2)):
             continue
         if v != "&":
             if len(v) == 2:
@@ -54,8 +54,8 @@ def htmlentityreplace_errors(exc):
                     skip = False
                     continue
                 index = i + exc.start
-                if utils.isSurrogatePair(exc.object[index:min([exc.end, index+2])]):
-                    codepoint = utils.surrogatePairToCodepoint(exc.object[index:index+2])
+                if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
+                    codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2])
                     skip = True
                 else:
                     codepoint = ord(c)
@@ -68,7 +68,7 @@ def htmlentityreplace_errors(exc):
                     if not e.endswith(";"):
                         res.append(";")
                 else:
-                    res.append("&#x%s;"%(hex(cp)[2:]))
+                    res.append("&#x%s;" % (hex(cp)[2:]))
             return ("".join(res), exc.end)
         else:
             return xmlcharrefreplace_errors(exc)
@@ -102,10 +102,10 @@ class HTMLSerializer(object):
     sanitize = False
 
     options = ("quote_attr_values", "quote_char", "use_best_quote_char",
-          "minimize_boolean_attributes", "use_trailing_solidus",
-          "space_before_trailing_solidus", "omit_optional_tags",
-          "strip_whitespace", "inject_meta_charset", "escape_lt_in_attrs",
-          "escape_rcdata", "resolve_entities", "sanitize")
+               "minimize_boolean_attributes", "use_trailing_solidus",
+               "space_before_trailing_solidus", "omit_optional_tags",
+               "strip_whitespace", "inject_meta_charset", "escape_lt_in_attrs",
+               "escape_rcdata", "resolve_entities", "sanitize")
 
     def __init__(self, **kwargs):
         """Initialize HTMLSerializer.
@@ -226,24 +226,25 @@ def serialize(self, treewalker, encoding=None):
                     in_cdata = True
                 elif in_cdata:
                     self.serializeError(_("Unexpected child element of a CDATA element"))
-                for (attr_namespace,attr_name),attr_value in sorted(token["data"].items()):
-                    #TODO: Add namespace support here
+                for (attr_namespace, attr_name), attr_value in sorted(token["data"].items()):
+                    # TODO: Add namespace support here
                     k = attr_name
                     v = attr_value
                     yield self.encodeStrict(' ')
 
                     yield self.encodeStrict(k)
                     if not self.minimize_boolean_attributes or \
-                      (k not in booleanAttributes.get(name, tuple()) \
-                      and k not in booleanAttributes.get("", tuple())):
+                        (k not in booleanAttributes.get(name, tuple())
+                         and k not in booleanAttributes.get("", tuple())):
                         yield self.encodeStrict("=")
                         if self.quote_attr_values or not v:
                             quote_attr = True
                         else:
-                            quote_attr = reduce(lambda x,y: x or (y in v),
-                                spaceCharacters + ">\"'=", False)
+                            quote_attr = reduce(lambda x, y: x or (y in v),
+                                                spaceCharacters + ">\"'=", False)
                         v = v.replace("&", "&amp;")
-                        if self.escape_lt_in_attrs: v = v.replace("<", "&lt;")
+                        if self.escape_lt_in_attrs:
+                            v = v.replace("<", "&lt;")
                         if quote_attr:
                             quote_char = self.quote_char
                             if self.use_best_quote_char:
@@ -307,6 +308,7 @@ def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
         if self.strict:
             raise SerializeError
 
+
 def SerializeError(Exception):
     """Error in serialized tree"""
     pass
diff --git a/html5lib/tests/mockParser.py b/html5lib/tests/mockParser.py
index 83c62bb8..1b17e423 100644
--- a/html5lib/tests/mockParser.py
+++ b/html5lib/tests/mockParser.py
@@ -4,12 +4,13 @@
 import os
 
 if __name__ == '__main__':
-    #Allow us to import from the src directory
+    # Allow us to import from the src directory
     os.chdir(os.path.split(os.path.abspath(__file__))[0])
     sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src")))
 
 from tokenizer import HTMLTokenizer
 
+
 class HTMLParser(object):
     """ Fake parser to test tokenizer output """
     def parse(self, stream, output=True):
@@ -22,7 +23,8 @@ def parse(self, stream, output=True):
     x = HTMLParser()
     if len(sys.argv) > 1:
         if len(sys.argv) > 2:
-            import hotshot, hotshot.stats
+            import hotshot
+            import hotshot.stats
             prof = hotshot.Profile('stats.prof')
             prof.runcall(x.parse, sys.argv[1], False)
             prof.close()
diff --git a/html5lib/tests/performance/concatenation.py b/html5lib/tests/performance/concatenation.py
index f9ff754f..a1465036 100755
--- a/html5lib/tests/performance/concatenation.py
+++ b/html5lib/tests/performance/concatenation.py
@@ -1,23 +1,27 @@
 from __future__ import absolute_import, division, unicode_literals
 
+
 def f1():
     x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
     y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
     z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
     x += y + z
 
+
 def f2():
     x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
     y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
     z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
     x = x + y + z
 
+
 def f3():
     x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
     y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
     z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
     x = "".join((x, y, z))
 
+
 def f4():
     x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
     y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py
index 3dcdc39b..8bf6d4e3 100644
--- a/html5lib/tests/support.py
+++ b/html5lib/tests/support.py
@@ -15,11 +15,11 @@
 from html5lib import treebuilders
 del base_path
 
-#Build a dict of avaliable trees
-treeTypes = {"simpletree":treebuilders.getTreeBuilder("simpletree"),
-             "DOM":treebuilders.getTreeBuilder("dom")}
+# Build a dict of avaliable trees
+treeTypes = {"simpletree": treebuilders.getTreeBuilder("simpletree"),
+             "DOM": treebuilders.getTreeBuilder("dom")}
 
-#Try whatever etree implementations are avaliable from a list that are
+# Try whatever etree implementations are avaliable from a list that are
 #"supposed" to work
 try:
     import xml.etree.ElementTree as ElementTree
@@ -42,14 +42,16 @@
         pass
 
 try:
-    import lxml.etree as lxml # flake8: noqa
+    import lxml.etree as lxml  # flake8: noqa
 except ImportError:
     pass
 else:
     treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml")
 
+
 def get_data_files(subdirectory, files='*.dat'):
-    return glob.glob(os.path.join(test_dir,subdirectory,files))
+    return glob.glob(os.path.join(test_dir, subdirectory, files))
+
 
 class DefaultDict(dict):
     def __init__(self, default, *args, **kwargs):
@@ -59,6 +61,7 @@ def __init__(self, default, *args, **kwargs):
     def __getitem__(self, key):
         return dict.get(self, key, self.default)
 
+
 class TestData(object):
     def __init__(self, filename, newTestHeading="data", encoding="utf8"):
         if encoding == None:
@@ -73,17 +76,17 @@ def __del__(self):
 
     def __iter__(self):
         data = DefaultDict(None)
-        key=None
+        key = None
         for line in self.f:
             heading = self.isSectionHeading(line)
             if heading:
                 if data and heading == self.newTestHeading:
-                    #Remove trailing newline
+                    # Remove trailing newline
                     data[key] = data[key][:-1]
                     yield self.normaliseOutput(data)
                     data = DefaultDict(None)
                 key = heading
-                data[key]="" if self.encoding else b""
+                data[key] = "" if self.encoding else b""
             elif key is not None:
                 data[key] += line
         if data:
@@ -92,19 +95,20 @@ def __iter__(self):
     def isSectionHeading(self, line):
         """If the current heading is a test section heading return the heading,
         otherwise return False"""
-        #print(line)
+        # print(line)
         if line.startswith("#" if self.encoding else b"#"):
             return line[1:].strip()
         else:
             return False
 
     def normaliseOutput(self, data):
-        #Remove trailing newlines
-        for key,value in data.items():
+        # Remove trailing newlines
+        for key, value in data.items():
             if value.endswith("\n" if self.encoding else b"\n"):
                 data[key] = value[:-1]
         return data
 
+
 def convert(stripChars):
     def convertData(data):
         """convert the output of str(document) to the format used in the testcases"""
@@ -120,6 +124,7 @@ def convertData(data):
 
 convertExpected = convert(2)
 
+
 def errorMessage(input, expected, actual):
     msg = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n" %
            (repr(input), repr(expected), repr(actual)))
diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py
index 769e5a55..df957eb1 100644
--- a/html5lib/tests/test_encoding.py
+++ b/html5lib/tests/test_encoding.py
@@ -11,6 +11,7 @@
 from .support import get_data_files, TestData, test_dir, errorMessage
 from html5lib import HTMLParser, inputstream
 
+
 class Html5EncodingTestCase(unittest.TestCase):
     def test_codec_name_a(self):
         self.assertEqual(inputstream.codecName("utf-8"), "utf-8")
@@ -24,6 +25,7 @@ def test_codec_name_c(self):
     def test_codec_name_d(self):
         self.assertEqual(inputstream.codecName("ISO_8859--1"), "windows-1252")
 
+
 def runParserEncodingTest(data, encoding):
     p = HTMLParser()
     p.parse(data, useChardet=False)
@@ -31,6 +33,7 @@ def runParserEncodingTest(data, encoding):
 
     assert encoding == p.tokenizer.stream.charEncoding[0], errorMessage(data, encoding, p.tokenizer.stream.charEncoding[0])
 
+
 def runPreScanEncodingTest(data, encoding):
     stream = inputstream.HTMLBinaryInputStream(data, chardet=False)
     encoding = encoding.lower().decode("ascii")
@@ -41,6 +44,7 @@ def runPreScanEncodingTest(data, encoding):
 
     assert encoding == stream.charEncoding[0], errorMessage(data, encoding, stream.charEncoding[0])
 
+
 def test_encoding():
     for filename in get_data_files("encoding"):
         tests = TestData(filename, b"data", encoding=None)
@@ -49,11 +53,11 @@ def test_encoding():
             yield (runPreScanEncodingTest, test[b'data'], test[b'encoding'])
 
 try:
-    import chardet # flake8: noqa
+    import chardet  # flake8: noqa
 except ImportError:
     print("chardet not found, skipping chardet tests")
 else:
     def test_chardet():
-        data = open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt"), "rb").read()
+        data = open(os.path.join(test_dir, "encoding", "chardet", "test_big5.txt"), "rb").read()
         encoding = inputstream.HTMLInputStream(data).charEncoding
         assert encoding[0].lower() == "big5"
diff --git a/html5lib/tests/test_parser.py b/html5lib/tests/test_parser.py
index ae5b87fd..ce117148 100644
--- a/html5lib/tests/test_parser.py
+++ b/html5lib/tests/test_parser.py
@@ -12,11 +12,13 @@
 from .support import TestData, convert, convertExpected, treeTypes
 from html5lib import html5parser, constants
 
-#Run the parse error checks
+# Run the parse error checks
 checkParseErrors = False
 
-#XXX - There should just be one function here but for some reason the testcase
-#format differs from the treedump format by a single space character
+# XXX - There should just be one function here but for some reason the testcase
+# format differs from the treedump format by a single space character
+
+
 def convertTreeDump(data):
     return "\n".join(convert(3)(data).split("\n")[1:])
 
@@ -27,10 +29,10 @@ def runParserTest(innerHTML, input, expected, errors, treeClass,
                   namespaceHTMLElements):
     warnings.resetwarnings()
     warnings.simplefilter("error")
-    #XXX - move this out into the setup function
-    #concatenate all consecutive character tokens into a single token
+    # XXX - move this out into the setup function
+    # concatenate all consecutive character tokens into a single token
     try:
-        p = html5parser.HTMLParser(tree = treeClass,
+        p = html5parser.HTMLParser(tree=treeClass,
                                    namespaceHTMLElements=namespaceHTMLElements)
     except constants.DataLossWarning:
         return
@@ -45,7 +47,7 @@ def runParserTest(innerHTML, input, expected, errors, treeClass,
                 return
     except:
         errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
-                               "\nTraceback:", traceback.format_exc()])
+                              "\nTraceback:", traceback.format_exc()])
         assert False, errorMsg
 
     output = convertTreeDump(p.tree.testSerializer(document))
@@ -55,7 +57,7 @@ def runParserTest(innerHTML, input, expected, errors, treeClass,
         expected = namespaceExpected(r"\1<html \2>", expected)
 
     errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
-                           "\nReceived:", output])
+                          "\nReceived:", output])
     assert expected == output, errorMsg
 
     errStr = []
@@ -65,17 +67,18 @@ def runParserTest(innerHTML, input, expected, errors, treeClass,
                                                constants.E[errorcode] % datavars))
 
     errorMsg2 = "\n".join(["\n\nInput:", input,
-                            "\nExpected errors (" + str(len(errors)) + "):\n" + "\n".join(errors),
-                            "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)])
+                           "\nExpected errors (" + str(len(errors)) + "):\n" + "\n".join(errors),
+                           "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)])
     if checkParseErrors:
             assert len(p.errors) == len(errors), errorMsg2
 
+
 def test_parser():
-    sys.stderr.write('Testing tree builders '+ " ".join(list(treeTypes.keys())) + "\n")
+    sys.stderr.write('Testing tree builders ' + " ".join(list(treeTypes.keys())) + "\n")
     files = get_data_files('tree-construction')
 
     for filename in files:
-        testName = os.path.basename(filename).replace(".dat","")
+        testName = os.path.basename(filename).replace(".dat", "")
         if testName in ("main-element", "template"):
             continue
 
@@ -83,9 +86,9 @@ def test_parser():
 
         for index, test in enumerate(tests):
             input, errors, innerHTML, expected = [test[key] for key in
-                                                      ('data', 'errors',
-                                                      'document-fragment',
-                                                      'document')]
+                                                  ('data', 'errors',
+                                                   'document-fragment',
+                                                   'document')]
             if errors:
                 errors = errors.split("\n")
 
diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py
index a3a58a2b..1045bcdc 100755
--- a/html5lib/tests/test_parser2.py
+++ b/html5lib/tests/test_parser2.py
@@ -2,7 +2,7 @@
 
 import io
 
-from . import support # flake8: noqa
+from . import support  # flake8: noqa
 from html5lib import html5parser
 from html5lib.constants import namespaces
 from html5lib.treebuilders import dom
@@ -10,34 +10,38 @@
 import unittest
 
 # tests that aren't autogenerated from text files
+
+
 class MoreParserTests(unittest.TestCase):
 
-  def test_assertDoctypeCloneable(self):
-    parser = html5parser.HTMLParser(tree=dom.TreeBuilder)
-    doc = parser.parse('<!DOCTYPE HTML>')
-    self.assert_(doc.cloneNode(True))
+    def test_assertDoctypeCloneable(self):
+        parser = html5parser.HTMLParser(tree=dom.TreeBuilder)
+        doc = parser.parse('<!DOCTYPE HTML>')
+        self.assert_(doc.cloneNode(True))
 
-  def test_line_counter(self):
-    # http://groups.google.com/group/html5lib-discuss/browse_frm/thread/f4f00e4a2f26d5c0
-    parser = html5parser.HTMLParser(tree=dom.TreeBuilder)
-    parser.parse("<pre>\nx\n&gt;\n</pre>")
+    def test_line_counter(self):
+        # http://groups.google.com/group/html5lib-discuss/browse_frm/thread/f4f00e4a2f26d5c0
+        parser = html5parser.HTMLParser(tree=dom.TreeBuilder)
+        parser.parse("<pre>\nx\n&gt;\n</pre>")
 
-  def test_namespace_html_elements_0(self):
-    parser = html5parser.HTMLParser(namespaceHTMLElements=True)
-    doc = parser.parse("<html></html>")
-    self.assert_(doc.childNodes[0].namespace == namespaces["html"])
+    def test_namespace_html_elements_0(self):
+        parser = html5parser.HTMLParser(namespaceHTMLElements=True)
+        doc = parser.parse("<html></html>")
+        self.assert_(doc.childNodes[0].namespace == namespaces["html"])
 
-  def test_namespace_html_elements_1(self):
-    parser = html5parser.HTMLParser(namespaceHTMLElements=False)
-    doc = parser.parse("<html></html>")
-    self.assert_(doc.childNodes[0].namespace == None)
+    def test_namespace_html_elements_1(self):
+        parser = html5parser.HTMLParser(namespaceHTMLElements=False)
+        doc = parser.parse("<html></html>")
+        self.assert_(doc.childNodes[0].namespace == None)
+
+    def test_unicode_file(self):
+        parser = html5parser.HTMLParser()
+        doc = parser.parse(io.StringIO("a"))
 
-  def test_unicode_file(self):
-    parser = html5parser.HTMLParser()
-    doc = parser.parse(io.StringIO("a"))
 
 def buildTestSuite():
-  return unittest.defaultTestLoader.loadTestsFromName(__name__)
+    return unittest.defaultTestLoader.loadTestsFromName(__name__)
+
 
 def main():
     buildTestSuite()
diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py
index ab5de5fe..5bd083fe 100644
--- a/html5lib/tests/test_sanitizer.py
+++ b/html5lib/tests/test_sanitizer.py
@@ -7,68 +7,74 @@
 
 from html5lib import html5parser, sanitizer, constants
 
+
 def runSanitizerTest(name, expected, input):
     expected = ''.join([token.toxml() for token in html5parser.HTMLParser().
-                         parseFragment(expected).childNodes])
+                        parseFragment(expected).childNodes])
     expected = json.loads(json.dumps(expected))
     assert expected == sanitize_html(input)
 
+
 def sanitize_html(stream):
     return ''.join([token.toxml() for token in
                     html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
-                     parseFragment(stream).childNodes])
+                    parseFragment(stream).childNodes])
+
 
 def test_should_handle_astral_plane_characters():
     assert "<p>\U0001d4b5 \U0001d538</p>" == sanitize_html("<p>&#x1d4b5; &#x1d538;</p>")
 
+
 def test_sanitizer():
     for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
         if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']:
-            continue ### TODO
+            continue  # TODO
         if tag_name != tag_name.lower():
-            continue ### TODO
+            continue  # TODO
         if tag_name == 'image':
             yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
-              "<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
-              "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
+                   "<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
+                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
         elif tag_name == 'br':
             yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
-              "<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
-              "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
+                   "<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
+                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
         elif tag_name in constants.voidElements:
             yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
-              "<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
-              "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
+                   "<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
+                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
         else:
             yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
-              "<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name,tag_name),
-              "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
+                   "<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name, tag_name),
+                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
 
     for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
         tag_name = tag_name.upper()
         yield (runSanitizerTest, "test_should_forbid_%s_tag" % tag_name,
-          "&lt;%s title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/%s&gt;" % (tag_name,tag_name),
-          "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
+               "&lt;%s title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/%s&gt;" % (tag_name, tag_name),
+               "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
 
     for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
-        if attribute_name != attribute_name.lower(): continue ### TODO
-        if attribute_name == 'style': continue
+        if attribute_name != attribute_name.lower():
+            continue  # TODO
+        if attribute_name == 'style':
+            continue
         yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name,
-          "<p %s=\"foo\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % attribute_name,
-          "<p %s='foo'>foo <bad>bar</bad> baz</p>" % attribute_name)
+               "<p %s=\"foo\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % attribute_name,
+               "<p %s='foo'>foo <bad>bar</bad> baz</p>" % attribute_name)
 
     for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
         attribute_name = attribute_name.upper()
         yield (runSanitizerTest, "test_should_forbid_%s_attribute" % attribute_name,
-          "<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>",
-          "<p %s='display: none;'>foo <bad>bar</bad> baz</p>" % attribute_name)
+               "<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>",
+               "<p %s='display: none;'>foo <bad>bar</bad> baz</p>" % attribute_name)
 
     for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
         yield (runSanitizerTest, "test_should_allow_%s_uris" % protocol,
-          "<a href=\"%s\">foo</a>" % protocol,
-          """<a href="%s">foo</a>""" % protocol)
+               "<a href=\"%s\">foo</a>" % protocol,
+               """<a href="%s">foo</a>""" % protocol)
 
     for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
         yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
-          "<a href=\"%s\">foo</a>" % protocol,
-        """<a href="%s">foo</a>""" % protocol)
+               "<a href=\"%s\">foo</a>" % protocol,
+               """<a href="%s">foo</a>""" % protocol)
diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py
index 25eee1f0..adebf609 100644
--- a/html5lib/tests/test_serializer.py
+++ b/html5lib/tests/test_serializer.py
@@ -27,6 +27,7 @@
 
 default_namespace = constants.namespaces["html"]
 
+
 class JsonWalker(TreeWalker):
     def __iter__(self):
         for token in self.tree:
@@ -82,8 +83,9 @@ def _convertAttrib(self, attribs):
 
 
 def serialize_html(input, options):
-    options = dict([(str(k),v) for k,v in options.items()])
-    return serializer.HTMLSerializer(**options).render(JsonWalker(input),options.get("encoding",None))
+    options = dict([(str(k), v) for k, v in options.items()])
+    return serializer.HTMLSerializer(**options).render(JsonWalker(input), options.get("encoding", None))
+
 
 def runSerializerTest(input, expected, options):
     encoding = options.get("encoding", None)
@@ -94,7 +96,7 @@ def runSerializerTest(input, expected, options):
 
     result = serialize_html(input, options)
     if len(expected) == 1:
-        assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s"%(expected[0], result, str(options))
+        assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
     elif result not in expected:
         assert False, "Expected: %s, Received: %s" % (expected, result)
 
@@ -150,23 +152,24 @@ def setUp(self):
 
         def testEntityReplacement(self):
             doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
-            tree = etree.fromstring(doc, parser = self.parser).getroottree()
+            tree = etree.fromstring(doc, parser=self.parser).getroottree()
             result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False)
             self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>""", result)
 
         def testEntityXML(self):
             doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>"""
-            tree = etree.fromstring(doc, parser = self.parser).getroottree()
+            tree = etree.fromstring(doc, parser=self.parser).getroottree()
             result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False)
             self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>""", result)
 
         def testEntityNoResolve(self):
             doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
-            tree = etree.fromstring(doc, parser = self.parser).getroottree()
+            tree = etree.fromstring(doc, parser=self.parser).getroottree()
             result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False,
                                           resolve_entities=False)
             self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>""", result)
 
+
 def test_serializer():
     for filename in get_data_files('serializer', '*.test'):
         with open(filename) as fp:
diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py
index cd4a8132..52ee03e7 100755
--- a/html5lib/tests/test_stream.py
+++ b/html5lib/tests/test_stream.py
@@ -1,16 +1,20 @@
 from __future__ import absolute_import, division, unicode_literals
 
-from . import support # flake8: noqa
-import unittest, codecs
+from . import support  # flake8: noqa
+import unittest
+import codecs
 
 from html5lib.inputstream import HTMLInputStream, HTMLUnicodeInputStream, HTMLBinaryInputStream
 
+
 class HTMLUnicodeInputStreamShortChunk(HTMLUnicodeInputStream):
     _defaultChunkSize = 2
 
+
 class HTMLBinaryInputStreamShortChunk(HTMLBinaryInputStream):
     _defaultChunkSize = 2
 
+
 class HTMLInputStreamTest(unittest.TestCase):
 
     def test_char_ascii(self):
@@ -36,7 +40,7 @@ def test_bom(self):
         self.assertEquals(stream.char(), "'")
 
     def test_utf_16(self):
-        stream = HTMLInputStream((' '*1025).encode('utf-16'))
+        stream = HTMLInputStream((' ' * 1025).encode('utf-16'))
         self.assert_(stream.charEncoding[0] in ['utf-16-le', 'utf-16-be'], stream.charEncoding)
         self.assertEquals(len(stream.charsUntil(' ', True)), 1025)
 
@@ -87,9 +91,11 @@ def test_position2(self):
         self.assertEquals(stream.char(), "d")
         self.assertEquals(stream.position(), (2, 1))
 
+
 def buildTestSuite():
     return unittest.defaultTestLoader.loadTestsFromName(__name__)
 
+
 def main():
     buildTestSuite()
     unittest.main()
diff --git a/html5lib/tests/test_tokenizer.py b/html5lib/tests/test_tokenizer.py
index ddbdf03b..2642e719 100644
--- a/html5lib/tests/test_tokenizer.py
+++ b/html5lib/tests/test_tokenizer.py
@@ -1,7 +1,6 @@
 from __future__ import absolute_import, division, unicode_literals
 
 
-
 import warnings
 import re
 
@@ -14,6 +13,7 @@
 from html5lib.tokenizer import HTMLTokenizer
 from html5lib import constants
 
+
 class TokenizerTestParser(object):
     def __init__(self, initialState, lastStartTag=None):
         self.tokenizer = HTMLTokenizer
@@ -27,9 +27,9 @@ def parse(self, stream, encoding=None, innerHTML=False):
         tokenizer.state = getattr(tokenizer, self._state)
         if self._lastStartTag is not None:
             tokenizer.currentToken = {"type": "startTag",
-                                      "name":self._lastStartTag}
+                                      "name": self._lastStartTag}
 
-        types = dict((v,k) for k,v in constants.tokenTypes.items())
+        types = dict((v, k) for k, v in constants.tokenTypes.items())
         for token in tokenizer:
             getattr(self, 'process%s' % types[token["type"]])(token)
 
@@ -68,12 +68,13 @@ def processEOF(self, token):
     def processParseError(self, token):
         self.outputTokens.append(["ParseError", token["data"]])
 
+
 def concatenateCharacterTokens(tokens):
     outputTokens = []
     for token in tokens:
         if not "ParseError" in token and token[0] == "Character":
             if (outputTokens and not "ParseError" in outputTokens[-1] and
-                outputTokens[-1][0] == "Character"):
+                    outputTokens[-1][0] == "Character"):
                 outputTokens[-1][1] += token[1]
             else:
                 outputTokens.append(token)
@@ -81,6 +82,7 @@ def concatenateCharacterTokens(tokens):
             outputTokens.append(token)
     return outputTokens
 
+
 def normalizeTokens(tokens):
     # TODO: convert tests to reflect arrays
     for i, token in enumerate(tokens):
@@ -88,6 +90,7 @@ def normalizeTokens(tokens):
             tokens[i] = token[0]
     return tokens
 
+
 def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder,
                 ignoreErrors=False):
     """Test whether the test has passed or failed
@@ -95,10 +98,10 @@ def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder,
     If the ignoreErrorOrder flag is set to true we don't test the relative
     positions of parse errors and non parse errors
     """
-    checkSelfClosing= False
+    checkSelfClosing = False
     for token in expectedTokens:
         if (token[0] == "StartTag" and len(token) == 4
-            or token[0] == "EndTag" and len(token) == 3):
+                or token[0] == "EndTag" and len(token) == 3):
             checkSelfClosing = True
             break
 
@@ -110,10 +113,10 @@ def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder,
     if not ignoreErrorOrder and not ignoreErrors:
         return expectedTokens == receivedTokens
     else:
-        #Sort the tokens into two groups; non-parse errors and parse errors
-        tokens = {"expected":[[],[]], "received":[[],[]]}
+        # Sort the tokens into two groups; non-parse errors and parse errors
+        tokens = {"expected": [[], []], "received": [[], []]}
         for tokenType, tokenList in zip(list(tokens.keys()),
-                                         (expectedTokens, receivedTokens)):
+                                       (expectedTokens, receivedTokens)):
             for token in tokenList:
                 if token != "ParseError":
                     tokens[tokenType][0].append(token)
@@ -122,6 +125,7 @@ def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder,
                         tokens[tokenType][1].append(token)
         return tokens["expected"] == tokens["received"]
 
+
 def unescape(test):
     def decode(inp):
         return inp.encode("utf-8").decode("unicode-escape")
@@ -138,6 +142,7 @@ def decode(inp):
                     token[2][decode(key)] = decode(value)
     return test
 
+
 def runTokenizerTest(test):
     warnings.resetwarnings()
     warnings.simplefilter("error")
@@ -151,7 +156,7 @@ def runTokenizerTest(test):
     tokens = concatenateCharacterTokens(tokens)
     received = normalizeTokens(tokens)
     errorMsg = "\n".join(["\n\nInitial state:",
-                          test['initialState'] ,
+                          test['initialState'],
                           "\nInput:", test['input'],
                           "\nExpected:", repr(expected),
                           "\nreceived:", repr(tokens)])
@@ -159,22 +164,25 @@ def runTokenizerTest(test):
     ignoreErrorOrder = test.get('ignoreErrorOrder', False)
     assert tokensMatch(expected, received, ignoreErrorOrder, True), errorMsg
 
+
 def _doCapitalize(match):
     return match.group(1).upper()
 
 _capitalizeRe = re.compile(r"\W+(\w)").sub
 
+
 def capitalize(s):
     s = s.lower()
     s = _capitalizeRe(_doCapitalize, s)
     return s
 
+
 def testTokenizer():
     for filename in get_data_files('tokenizer', '*.test'):
         with open(filename) as fp:
             tests = json.load(fp)
             if 'tests' in tests:
-                for index,test in enumerate(tests['tests']):
+                for index, test in enumerate(tests['tests']):
                     if 'initialStates' not in test:
                         test["initialStates"] = ["Data state"]
                     if 'doubleEscaped' in test:
diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
index a09dde7a..039bf3d9 100644
--- a/html5lib/tests/test_treewalkers.py
+++ b/html5lib/tests/test_treewalkers.py
@@ -15,6 +15,7 @@
 
 from html5lib import html5parser, treewalkers, treebuilders, constants
 
+
 def PullDOMAdapter(node):
     from xml.dom import Node
     from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, COMMENT, CHARACTERS
@@ -44,16 +45,16 @@ def PullDOMAdapter(node):
         raise NotImplementedError("Node type not supported: " + str(node.nodeType))
 
 treeTypes = {
-"simpletree":  {"builder": treebuilders.getTreeBuilder("simpletree"),
-                "walker":  treewalkers.getTreeWalker("simpletree")},
-"DOM":         {"builder": treebuilders.getTreeBuilder("dom"),
-                "walker":  treewalkers.getTreeWalker("dom")},
-"PullDOM":     {"builder": treebuilders.getTreeBuilder("dom"),
-                "adapter": PullDOMAdapter,
-                "walker":  treewalkers.getTreeWalker("pulldom")},
+    "simpletree": {"builder": treebuilders.getTreeBuilder("simpletree"),
+                   "walker": treewalkers.getTreeWalker("simpletree")},
+"DOM": {"builder": treebuilders.getTreeBuilder("dom"),
+        "walker": treewalkers.getTreeWalker("dom")},
+"PullDOM": {"builder": treebuilders.getTreeBuilder("dom"),
+            "adapter": PullDOMAdapter,
+            "walker": treewalkers.getTreeWalker("pulldom")},
 }
 
-#Try whatever etree implementations are available from a list that are
+# Try whatever etree implementations are available from a list that are
 #"supposed" to work
 try:
     import xml.etree.ElementTree as ElementTree
@@ -62,7 +63,7 @@ def PullDOMAdapter(node):
 else:
     treeTypes['ElementTree'] = \
         {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
-         "walker":  treewalkers.getTreeWalker("etree", ElementTree)}
+         "walker": treewalkers.getTreeWalker("etree", ElementTree)}
 
 try:
     import xml.etree.cElementTree as ElementTree
@@ -71,11 +72,11 @@ def PullDOMAdapter(node):
 else:
     treeTypes['cElementTree'] = \
         {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
-         "walker":  treewalkers.getTreeWalker("etree", ElementTree)}
+         "walker": treewalkers.getTreeWalker("etree", ElementTree)}
 
 
 try:
-    import lxml.etree as ElementTree # flake8: noqa
+    import lxml.etree as ElementTree  # flake8: noqa
 except ImportError:
     pass
 else:
@@ -84,16 +85,16 @@ def PullDOMAdapter(node):
 #         "walker":  treewalkers.getTreeWalker("etree", ElementTree)}
     treeTypes['lxml_native'] = \
         {"builder": treebuilders.getTreeBuilder("lxml"),
-         "walker":  treewalkers.getTreeWalker("lxml")}
+         "walker": treewalkers.getTreeWalker("lxml")}
 
 
-#Try whatever etree implementations are available from a list that are
+# Try whatever etree implementations are available from a list that are
 #"supposed" to work
 try:
     import pxdom
     treeTypes['pxdom'] = \
         {"builder": treebuilders.getTreeBuilder("dom", pxdom),
-         "walker":  treewalkers.getTreeWalker("dom")}
+         "walker": treewalkers.getTreeWalker("dom")}
 except ImportError:
     pass
 
@@ -141,18 +142,19 @@ def GenshiAdapter(tree):
                                 token["systemId"]), (None, -1, -1)
 
             else:
-                pass # FIXME: What to do?
+                pass  # FIXME: What to do?
 
         if text is not None:
             yield TEXT, text, (None, -1, -1)
 
     treeTypes["genshi"] = \
-       {"builder": treebuilders.getTreeBuilder("simpletree"),
-        "adapter": GenshiAdapter,
-        "walker":  treewalkers.getTreeWalker("genshi")}
+        {"builder": treebuilders.getTreeBuilder("simpletree"),
+         "adapter": GenshiAdapter,
+         "walker": treewalkers.getTreeWalker("genshi")}
 except ImportError:
     pass
 
+
 def concatenateCharacterTokens(tokens):
     charactersToken = None
     for token in tokens:
@@ -170,6 +172,7 @@ def concatenateCharacterTokens(tokens):
     if charactersToken is not None:
         yield charactersToken
 
+
 def convertTokens(tokens):
     output = []
     indent = 0
@@ -177,7 +180,7 @@ def convertTokens(tokens):
         type = token["type"]
         if type in ("StartTag", "EmptyTag"):
             if (token["namespace"] and
-                token["namespace"] != constants.namespaces["html"]):
+                    token["namespace"] != constants.namespaces["html"]):
                 if token["namespace"] in constants.prefixes:
                     name = constants.prefixes[token["namespace"]]
                 else:
@@ -185,12 +188,12 @@ def convertTokens(tokens):
                 name += " " + token["name"]
             else:
                 name = token["name"]
-            output.append("%s<%s>" % (" "*indent, name))
+            output.append("%s<%s>" % (" " * indent, name))
             indent += 2
             attrs = token["data"]
             if attrs:
-                #TODO: Remove this if statement, attrs should always exist
-                for (namespace,name),value in sorted(attrs.items()):
+                # TODO: Remove this if statement, attrs should always exist
+                for (namespace, name), value in sorted(attrs.items()):
                     if namespace:
                         if namespace in constants.prefixes:
                             outputname = constants.prefixes[namespace]
@@ -199,41 +202,43 @@ def convertTokens(tokens):
                         outputname += " " + name
                     else:
                         outputname = name
-                    output.append("%s%s=\"%s\"" % (" "*indent, outputname, value))
+                    output.append("%s%s=\"%s\"" % (" " * indent, outputname, value))
             if type == "EmptyTag":
                 indent -= 2
         elif type == "EndTag":
             indent -= 2
         elif type == "Comment":
-            output.append("%s<!-- %s -->" % (" "*indent, token["data"]))
+            output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
         elif type == "Doctype":
             if token["name"]:
                 if token["publicId"]:
-                    output.append("""%s<!DOCTYPE %s "%s" "%s">"""%
-                                  (" "*indent, token["name"],
+                    output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
+                                  (" " * indent, token["name"],
                                    token["publicId"],
                                    token["systemId"] and token["systemId"] or ""))
                 elif token["systemId"]:
-                    output.append("""%s<!DOCTYPE %s "" "%s">"""%
-                                  (" "*indent, token["name"],
+                    output.append("""%s<!DOCTYPE %s "" "%s">""" %
+                                  (" " * indent, token["name"],
                                    token["systemId"]))
                 else:
-                    output.append("%s<!DOCTYPE %s>"%(" "*indent,
-                                                     token["name"]))
+                    output.append("%s<!DOCTYPE %s>" % (" " * indent,
+                                                       token["name"]))
             else:
-                output.append("%s<!DOCTYPE >" % (" "*indent,))
+                output.append("%s<!DOCTYPE >" % (" " * indent,))
         elif type in ("Characters", "SpaceCharacters"):
-            output.append("%s\"%s\"" % (" "*indent, token["data"]))
+            output.append("%s\"%s\"" % (" " * indent, token["data"]))
         else:
-            pass # TODO: what to do with errors?
+            pass  # TODO: what to do with errors?
     return "\n".join(output)
 
 import re
-attrlist = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+",re.M)
+attrlist = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+", re.M)
+
+
 def sortattrs(x):
-  lines = x.group(0).split("\n")
-  lines.sort()
-  return "\n".join(lines)
+    lines = x.group(0).split("\n")
+    lines.sort()
+    return "\n".join(lines)
 
 
 class TokenTestCase(unittest.TestCase):
@@ -250,26 +255,27 @@ def test_all_tokens(self):
             {'data': 'c', 'type': 'Characters'},
             {'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
             {'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'}
-            ]
+        ]
         for treeName, treeCls in treeTypes.items():
-            p = html5parser.HTMLParser(tree = treeCls["builder"])
+            p = html5parser.HTMLParser(tree=treeCls["builder"])
             document = p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
             document = treeCls.get("adapter", lambda x: x)(document)
             output = treeCls["walker"](document)
             for expectedToken, outputToken in zip(expected, output):
                 self.assertEqual(expectedToken, outputToken)
 
+
 def runTreewalkerTest(innerHTML, input, expected, errors, treeClass):
     warnings.resetwarnings()
     warnings.simplefilter("error")
     try:
-        p = html5parser.HTMLParser(tree = treeClass["builder"])
+        p = html5parser.HTMLParser(tree=treeClass["builder"])
         if innerHTML:
             document = p.parseFragment(input, innerHTML)
         else:
             document = p.parse(input)
     except constants.DataLossWarning:
-        #Ignore testcases we know we don't pass
+        # Ignore testcases we know we don't pass
         return
 
     document = treeClass.get("adapter", lambda x: x)(document)
@@ -281,21 +287,22 @@ def runTreewalkerTest(innerHTML, input, expected, errors, treeClass):
                                     [line + "\n" for line in output.splitlines()],
                                     "Expected", "Received"))
         assert expected == output, "\n".join([
-                "", "Input:", input,
+            "", "Input:", input,
                 "", "Expected:", expected,
                 "", "Received:", output,
                 "", "Diff:", diff,
-                ])
+        ])
     except NotImplementedError:
-        pass # Amnesty for those that confess...
+        pass  # Amnesty for those that confess...
+
 
 def test_treewalker():
-    sys.stdout.write('Testing tree walkers '+ " ".join(list(treeTypes.keys())) + "\n")
+    sys.stdout.write('Testing tree walkers ' + " ".join(list(treeTypes.keys())) + "\n")
 
     for treeName, treeCls in treeTypes.items():
         files = get_data_files('tree-construction')
         for filename in files:
-            testName = os.path.basename(filename).replace(".dat","")
+            testName = os.path.basename(filename).replace(".dat", "")
             if testName in ("main-element", "template"):
                 continue
 
@@ -308,5 +315,3 @@ def test_treewalker():
                                                                "document")]
                 errors = errors.split("\n")
                 yield runTreewalkerTest, innerHTML, input, expected, errors, treeCls
-
-
diff --git a/html5lib/tests/test_whitespace_filter.py b/html5lib/tests/test_whitespace_filter.py
index 9525d41b..9ed27fd6 100644
--- a/html5lib/tests/test_whitespace_filter.py
+++ b/html5lib/tests/test_whitespace_filter.py
@@ -11,6 +11,7 @@
 except AttributeError:
     unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
 
+
 class TestCase(unittest.TestCase):
     def runTest(self, input, expected):
         output = list(Filter(input))
@@ -24,19 +25,19 @@ def runTestUnmodifiedOutput(self, input):
 
     def testPhrasingElements(self):
         self.runTestUnmodifiedOutput(
-            [{"type": "Characters", "data": "This is a " },
-             {"type": "StartTag", "name": "span", "data": [] },
-             {"type": "Characters", "data": "phrase" },
+            [{"type": "Characters", "data": "This is a "},
+             {"type": "StartTag", "name": "span", "data": []},
+             {"type": "Characters", "data": "phrase"},
              {"type": "EndTag", "name": "span", "data": []},
-             {"type": "SpaceCharacters", "data": " " },
-             {"type": "Characters", "data": "with" },
-             {"type": "SpaceCharacters", "data": " " },
-             {"type": "StartTag", "name": "em", "data": [] },
-             {"type": "Characters", "data": "emphasised text" },
+             {"type": "SpaceCharacters", "data": " "},
+             {"type": "Characters", "data": "with"},
+             {"type": "SpaceCharacters", "data": " "},
+             {"type": "StartTag", "name": "em", "data": []},
+             {"type": "Characters", "data": "emphasised text"},
              {"type": "EndTag", "name": "em", "data": []},
-             {"type": "Characters", "data": " and an " },
-             {"type": "StartTag", "name": "img", "data": [["alt", "image"]] },
-             {"type": "Characters", "data": "." }])
+             {"type": "Characters", "data": " and an "},
+             {"type": "StartTag", "name": "img", "data": [["alt", "image"]]},
+             {"type": "Characters", "data": "."}])
 
     def testLeadingWhitespace(self):
         self.runTest(
@@ -119,9 +120,11 @@ def testWhitespaceInPre(self):
              {"type": "Characters", "data": "foo" + spaceCharacters + "bar"},
              {"type": "EndTag", "name": "pre", "data": []}])
 
+
 def buildTestSuite():
     return unittest.defaultTestLoader.loadTestsFromName(__name__)
 
+
 def main():
     buildTestSuite()
     unittest.main()
diff --git a/html5lib/tests/tokenizertotree.py b/html5lib/tests/tokenizertotree.py
index 8668561d..27239997 100644
--- a/html5lib/tests/tokenizertotree.py
+++ b/html5lib/tests/tokenizertotree.py
@@ -13,22 +13,24 @@
 
 unnamespaceExpected = re.compile(r"^(\|\s*)<html ([^>]+)>", re.M).sub
 
+
 def main(out_path):
     if not os.path.exists(out_path):
-        sys.stderr.write("Path %s does not exist"%out_path)
+        sys.stderr.write("Path %s does not exist" % out_path)
         sys.exit(1)
 
     for filename in support.get_data_files('tokenizer', '*.test'):
         run_file(filename, out_path)
 
+
 def run_file(filename, out_path):
     try:
         tests_data = json.load(file(filename))
     except ValueError:
-        sys.stderr.write("Failed to load %s\n"%filename)
+        sys.stderr.write("Failed to load %s\n" % filename)
         return
     name = os.path.splitext(os.path.split(filename)[1])[0]
-    output_file = open(os.path.join(out_path, "tokenizer_%s.dat"%name), "w")
+    output_file = open(os.path.join(out_path, "tokenizer_%s.dat" % name), "w")
 
     if 'tests' in tests_data:
         for test_data in tests_data['tests']:
@@ -37,13 +39,14 @@ def run_file(filename, out_path):
 
             for initial_state in test_data["initialStates"]:
                 if initial_state != "Data state":
-                    #don't support this yet
+                    # don't support this yet
                     continue
                 test = make_test(test_data)
                 output_file.write(test)
 
     output_file.close()
 
+
 def make_test(test_data):
     if 'doubleEscaped' in test_data:
         test_data = test_tokenizer.unescape_test(test_data)
@@ -54,8 +57,8 @@ def make_test(test_data):
     rv.append("#errors")
     tree = p.parse(test_data["input"])
     output = p.tree.testSerializer(tree)
-    output  = "\n".join(("| "+ line[3:]) if line.startswith("|  ") else line
-                        for line in output.split("\n"))
+    output = "\n".join(("| " + line[3:]) if line.startswith("|  ") else line
+                       for line in output.split("\n"))
     output = unnamespaceExpected(r"\1<\2>", output)
     rv.append(output.encode("utf8"))
     rv.append("")
diff --git a/html5lib/tokenizer.py b/html5lib/tokenizer.py
index dd54eb67..fca976c7 100644
--- a/html5lib/tokenizer.py
+++ b/html5lib/tokenizer.py
@@ -20,6 +20,7 @@
 
 entitiesTrie = Trie(entities)
 
+
 class HTMLTokenizer(object):
     """ This class takes care of tokenizing HTML.
 
@@ -39,7 +40,7 @@ def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
         self.stream = HTMLInputStream(stream, encoding, parseMeta, useChardet)
         self.parser = parser
 
-        #Perform case conversions?
+        # Perform case conversions?
         self.lowercaseElementName = lowercaseElementName
         self.lowercaseAttrName = lowercaseAttrName
 
@@ -97,20 +98,20 @@ def consumeNumberEntity(self, isHex):
         if charAsInt in replacementCharacters:
             char = replacementCharacters[charAsInt]
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "illegal-codepoint-for-numeric-entity",
-              "datavars": {"charAsInt": charAsInt}})
+                                    "illegal-codepoint-for-numeric-entity",
+                                    "datavars": {"charAsInt": charAsInt}})
         elif ((0xD800 <= charAsInt <= 0xDFFF) or
               (charAsInt > 0x10FFFF)):
             char = "\uFFFD"
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "illegal-codepoint-for-numeric-entity",
-              "datavars": {"charAsInt": charAsInt}})
+                                    "illegal-codepoint-for-numeric-entity",
+                                    "datavars": {"charAsInt": charAsInt}})
         else:
-            #Should speed up this check somehow (e.g. move the set to a constant)
+            # Should speed up this check somehow (e.g. move the set to a constant)
             if ((0x0001 <= charAsInt <= 0x0008) or
                 (0x000E <= charAsInt <= 0x001F) or
-                (0x007F  <= charAsInt <= 0x009F) or
-                (0xFDD0  <= charAsInt <= 0xFDEF) or
+                (0x007F <= charAsInt <= 0x009F) or
+                (0xFDD0 <= charAsInt <= 0xFDEF) or
                 charAsInt in frozenset([0x000B, 0xFFFE, 0xFFFF, 0x1FFFE,
                                         0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
                                         0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE,
@@ -122,7 +123,7 @@ def consumeNumberEntity(self, isHex):
                                         0xFFFFF, 0x10FFFE, 0x10FFFF])):
                 self.tokenQueue.append({"type": tokenTypes["ParseError"],
                                         "data":
-                                            "illegal-codepoint-for-numeric-entity",
+                                        "illegal-codepoint-for-numeric-entity",
                                         "datavars": {"charAsInt": charAsInt}})
             try:
                 # Try/except needed as UCS-2 Python builds' unichar only works
@@ -136,7 +137,7 @@ def consumeNumberEntity(self, isHex):
         # invoke parseError on parser.
         if c != ";":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "numeric-entity-without-semicolon"})
+                                    "numeric-entity-without-semicolon"})
             self.stream.unget(c)
 
         return char
@@ -147,7 +148,7 @@ def consumeEntity(self, allowedChar=None, fromAttribute=False):
 
         charStack = [self.stream.char()]
         if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&")
-            or (allowedChar is not None and allowedChar == charStack[0])):
+                or (allowedChar is not None and allowedChar == charStack[0])):
             self.stream.unget(charStack[0])
 
         elif charStack[0] == "#":
@@ -160,14 +161,14 @@ def consumeEntity(self, allowedChar=None, fromAttribute=False):
 
             # charStack[-1] should be the first digit
             if (hex and charStack[-1] in hexDigits) \
-             or (not hex and charStack[-1] in digits):
+                    or (not hex and charStack[-1] in digits):
                 # At least one digit found, so consume the whole number
                 self.stream.unget(charStack[-1])
                 output = self.consumeNumberEntity(hex)
             else:
                 # No digits found
                 self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                    "data": "expected-numeric-entity"})
+                                        "data": "expected-numeric-entity"})
                 self.stream.unget(charStack.pop())
                 output = "&" + "".join(charStack)
 
@@ -195,11 +196,11 @@ def consumeEntity(self, allowedChar=None, fromAttribute=False):
             if entityName is not None:
                 if entityName[-1] != ";":
                     self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                      "named-entity-without-semicolon"})
+                                            "named-entity-without-semicolon"})
                 if (entityName[-1] != ";" and fromAttribute and
                     (charStack[entityLength] in asciiLetters or
                      charStack[entityLength] in digits or
-                    charStack[entityLength] == "=")):
+                     charStack[entityLength] == "=")):
                     self.stream.unget(charStack.pop())
                     output = "&" + "".join(charStack)
                 else:
@@ -208,7 +209,7 @@ def consumeEntity(self, allowedChar=None, fromAttribute=False):
                     output += "".join(charStack[entityLength:])
             else:
                 self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                  "expected-named-entity"})
+                                        "expected-named-entity"})
                 self.stream.unget(charStack.pop())
                 output = "&" + "".join(charStack)
 
@@ -238,17 +239,15 @@ def emitCurrentToken(self):
                 token["name"] = token["name"].translate(asciiUpper2Lower)
             if token["type"] == tokenTypes["EndTag"]:
                 if token["data"]:
-                    self.tokenQueue.append({"type":tokenTypes["ParseError"],
-                                            "data":"attributes-in-end-tag"})
+                    self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                            "data": "attributes-in-end-tag"})
                 if token["selfClosing"]:
-                    self.tokenQueue.append({"type":tokenTypes["ParseError"],
-                                            "data":"self-closing-flag-on-end-tag"})
+                    self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                            "data": "self-closing-flag-on-end-tag"})
         self.tokenQueue.append(token)
         self.state = self.dataState
 
-
     # Below are the various tokenizer states worked out.
-
     def dataState(self):
         data = self.stream.char()
         if data == "&":
@@ -257,7 +256,7 @@ def dataState(self):
             self.state = self.tagOpenState
         elif data == "\u0000":
             self.tokenQueue.append({"type": tokenTypes["ParseError"],
-                                    "data":"invalid-codepoint"})
+                                    "data": "invalid-codepoint"})
             self.tokenQueue.append({"type": tokenTypes["Characters"],
                                     "data": "\u0000"})
         elif data is EOF:
@@ -268,14 +267,14 @@ def dataState(self):
             # state". At that point spaceCharacters are important so they are
             # emitted separately.
             self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data":
-              data + self.stream.charsUntil(spaceCharacters, True)})
+                                    data + self.stream.charsUntil(spaceCharacters, True)})
             # No need to update lastFourChars here, since the first space will
             # have already been appended to lastFourChars and will have broken
             # any <!-- or --> sequences
         else:
             chars = self.stream.charsUntil(("&", "<", "\u0000"))
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
-              data + chars})
+                                    data + chars})
         return True
 
     def entityDataState(self):
@@ -302,14 +301,14 @@ def rcdataState(self):
             # state". At that point spaceCharacters are important so they are
             # emitted separately.
             self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data":
-              data + self.stream.charsUntil(spaceCharacters, True)})
+                                    data + self.stream.charsUntil(spaceCharacters, True)})
             # No need to update lastFourChars here, since the first space will
             # have already been appended to lastFourChars and will have broken
             # any <!-- or --> sequences
         else:
             chars = self.stream.charsUntil(("&", "<"))
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
-              data + chars})
+                                    data + chars})
         return True
 
     def characterReferenceInRcdata(self):
@@ -332,7 +331,7 @@ def rawtextState(self):
         else:
             chars = self.stream.charsUntil(("<", "\u0000"))
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
-              data + chars})
+                                    data + chars})
         return True
 
     def scriptDataState(self):
@@ -350,7 +349,7 @@ def scriptDataState(self):
         else:
             chars = self.stream.charsUntil(("<", "\u0000"))
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
-              data + chars})
+                                    data + chars})
         return True
 
     def plaintextState(self):
@@ -384,20 +383,20 @@ def tagOpenState(self):
             # XXX In theory it could be something besides a tag name. But
             # do we really care?
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-tag-name-but-got-right-bracket"})
+                                    "expected-tag-name-but-got-right-bracket"})
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<>"})
             self.state = self.dataState
         elif data == "?":
             # XXX In theory it could be something besides a tag name. But
             # do we really care?
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-tag-name-but-got-question-mark"})
+                                    "expected-tag-name-but-got-question-mark"})
             self.stream.unget(data)
             self.state = self.bogusCommentState
         else:
             # XXX
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-tag-name"})
+                                    "expected-tag-name"})
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
             self.stream.unget(data)
             self.state = self.dataState
@@ -407,22 +406,22 @@ def closeTagOpenState(self):
         data = self.stream.char()
         if data in asciiLetters:
             self.currentToken = {"type": tokenTypes["EndTag"], "name": data,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.state = self.tagNameState
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-closing-tag-but-got-right-bracket"})
+                                    "expected-closing-tag-but-got-right-bracket"})
             self.state = self.dataState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-closing-tag-but-got-eof"})
+                                    "expected-closing-tag-but-got-eof"})
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
             self.state = self.dataState
         else:
             # XXX data can be _'_...
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-closing-tag-but-got-char",
-              "datavars": {"data": data}})
+                                    "expected-closing-tag-but-got-char",
+                                    "datavars": {"data": data}})
             self.stream.unget(data)
             self.state = self.bogusCommentState
         return True
@@ -435,7 +434,7 @@ def tagNameState(self):
             self.emitCurrentToken()
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-tag-name"})
+                                    "eof-in-tag-name"})
             self.state = self.dataState
         elif data == "/":
             self.state = self.selfClosingStartTagState
@@ -477,17 +476,17 @@ def rcdataEndTagNameState(self):
         if data in spaceCharacters and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.state = self.beforeAttributeNameState
         elif data == "/" and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.state = self.selfClosingStartTagState
         elif data == ">" and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.emitCurrentToken()
             self.state = self.dataState
         elif data in asciiLetters:
@@ -527,17 +526,17 @@ def rawtextEndTagNameState(self):
         if data in spaceCharacters and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.state = self.beforeAttributeNameState
         elif data == "/" and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.state = self.selfClosingStartTagState
         elif data == ">" and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.emitCurrentToken()
             self.state = self.dataState
         elif data in asciiLetters:
@@ -580,17 +579,17 @@ def scriptDataEndTagNameState(self):
         if data in spaceCharacters and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.state = self.beforeAttributeNameState
         elif data == "/" and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.state = self.selfClosingStartTagState
         elif data == ">" and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.emitCurrentToken()
             self.state = self.dataState
         elif data in asciiLetters:
@@ -639,7 +638,7 @@ def scriptDataEscapedState(self):
         else:
             chars = self.stream.charsUntil(("<", "-", "\u0000"))
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
-              data + chars})
+                                    data + chars})
         return True
 
     def scriptDataEscapedDashState(self):
@@ -716,17 +715,17 @@ def scriptDataEscapedEndTagNameState(self):
         if data in spaceCharacters and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.state = self.beforeAttributeNameState
         elif data == "/" and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.state = self.selfClosingStartTagState
         elif data == ">" and appropriate:
             self.currentToken = {"type": tokenTypes["EndTag"],
                                  "name": self.temporaryBuffer,
-                                 "data": [], "selfClosing":False}
+                                 "data": [], "selfClosing": False}
             self.emitCurrentToken()
             self.state = self.dataState
         elif data in asciiLetters:
@@ -769,7 +768,7 @@ def scriptDataDoubleEscapedState(self):
                                     "data": "\uFFFD"})
         elif data == EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-script-in-script"})
+                                    "eof-in-script-in-script"})
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
@@ -791,7 +790,7 @@ def scriptDataDoubleEscapedDashState(self):
             self.state = self.scriptDataDoubleEscapedState
         elif data == EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-script-in-script"})
+                                    "eof-in-script-in-script"})
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
@@ -816,7 +815,7 @@ def scriptDataDoubleEscapedDashDashState(self):
             self.state = self.scriptDataDoubleEscapedState
         elif data == EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-script-in-script"})
+                                    "eof-in-script-in-script"})
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
@@ -863,7 +862,7 @@ def beforeAttributeNameState(self):
             self.state = self.selfClosingStartTagState
         elif data in ("'", '"', "=", "<"):
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "invalid-character-in-attribute-name"})
+                                    "invalid-character-in-attribute-name"})
             self.currentToken["data"].append([data, ""])
             self.state = self.attributeNameState
         elif data == "\u0000":
@@ -873,7 +872,7 @@ def beforeAttributeNameState(self):
             self.state = self.attributeNameState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-attribute-name-but-got-eof"})
+                                    "expected-attribute-name-but-got-eof"})
             self.state = self.dataState
         else:
             self.currentToken["data"].append([data, ""])
@@ -888,7 +887,7 @@ def attributeNameState(self):
             self.state = self.beforeAttributeValueState
         elif data in asciiLetters:
             self.currentToken["data"][-1][0] += data +\
-              self.stream.charsUntil(asciiLetters, True)
+                self.stream.charsUntil(asciiLetters, True)
             leavingThisState = False
         elif data == ">":
             # XXX If we emit here the attributes are converted to a dict
@@ -907,7 +906,7 @@ def attributeNameState(self):
         elif data in ("'", '"', "<"):
             self.tokenQueue.append({"type": tokenTypes["ParseError"],
                                     "data":
-                                        "invalid-character-in-attribute-name"})
+                                    "invalid-character-in-attribute-name"})
             self.currentToken["data"][-1][0] += data
             leavingThisState = False
         elif data is EOF:
@@ -928,7 +927,7 @@ def attributeNameState(self):
             for name, value in self.currentToken["data"][:-1]:
                 if self.currentToken["data"][-1][0] == name:
                     self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                      "duplicate-attribute"})
+                                            "duplicate-attribute"})
                     break
             # XXX Fix for above XXX
             if emitToken:
@@ -955,12 +954,12 @@ def afterAttributeNameState(self):
             self.state = self.attributeNameState
         elif data in ("'", '"', "<"):
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "invalid-character-after-attribute-name"})
+                                    "invalid-character-after-attribute-name"})
             self.currentToken["data"].append([data, ""])
             self.state = self.attributeNameState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-end-of-tag-but-got-eof"})
+                                    "expected-end-of-tag-but-got-eof"})
             self.state = self.dataState
         else:
             self.currentToken["data"].append([data, ""])
@@ -975,12 +974,12 @@ def beforeAttributeValueState(self):
             self.state = self.attributeValueDoubleQuotedState
         elif data == "&":
             self.state = self.attributeValueUnQuotedState
-            self.stream.unget(data);
+            self.stream.unget(data)
         elif data == "'":
             self.state = self.attributeValueSingleQuotedState
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-attribute-value-but-got-right-bracket"})
+                                    "expected-attribute-value-but-got-right-bracket"})
             self.emitCurrentToken()
         elif data == "\u0000":
             self.tokenQueue.append({"type": tokenTypes["ParseError"],
@@ -989,12 +988,12 @@ def beforeAttributeValueState(self):
             self.state = self.attributeValueUnQuotedState
         elif data in ("=", "<", "`"):
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "equals-in-unquoted-attribute-value"})
+                                    "equals-in-unquoted-attribute-value"})
             self.currentToken["data"][-1][1] += data
             self.state = self.attributeValueUnQuotedState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-attribute-value-but-got-eof"})
+                                    "expected-attribute-value-but-got-eof"})
             self.state = self.dataState
         else:
             self.currentToken["data"][-1][1] += data
@@ -1013,11 +1012,11 @@ def attributeValueDoubleQuotedState(self):
             self.currentToken["data"][-1][1] += "\uFFFD"
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-attribute-value-double-quote"})
+                                    "eof-in-attribute-value-double-quote"})
             self.state = self.dataState
         else:
             self.currentToken["data"][-1][1] += data +\
-              self.stream.charsUntil(("\"", "&"))
+                self.stream.charsUntil(("\"", "&"))
         return True
 
     def attributeValueSingleQuotedState(self):
@@ -1032,11 +1031,11 @@ def attributeValueSingleQuotedState(self):
             self.currentToken["data"][-1][1] += "\uFFFD"
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-attribute-value-single-quote"})
+                                    "eof-in-attribute-value-single-quote"})
             self.state = self.dataState
         else:
             self.currentToken["data"][-1][1] += data +\
-              self.stream.charsUntil(("'", "&"))
+                self.stream.charsUntil(("'", "&"))
         return True
 
     def attributeValueUnQuotedState(self):
@@ -1049,7 +1048,7 @@ def attributeValueUnQuotedState(self):
             self.emitCurrentToken()
         elif data in ('"', "'", "=", "<", "`"):
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-character-in-unquoted-attribute-value"})
+                                    "unexpected-character-in-unquoted-attribute-value"})
             self.currentToken["data"][-1][1] += data
         elif data == "\u0000":
             self.tokenQueue.append({"type": tokenTypes["ParseError"],
@@ -1057,11 +1056,11 @@ def attributeValueUnQuotedState(self):
             self.currentToken["data"][-1][1] += "\uFFFD"
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-attribute-value-no-quotes"})
+                                    "eof-in-attribute-value-no-quotes"})
             self.state = self.dataState
         else:
             self.currentToken["data"][-1][1] += data + self.stream.charsUntil(
-              frozenset(("&", ">", '"', "'", "=", "<", "`")) | spaceCharacters)
+                frozenset(("&", ">", '"', "'", "=", "<", "`")) | spaceCharacters)
         return True
 
     def afterAttributeValueState(self):
@@ -1074,12 +1073,12 @@ def afterAttributeValueState(self):
             self.state = self.selfClosingStartTagState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-EOF-after-attribute-value"})
+                                    "unexpected-EOF-after-attribute-value"})
             self.stream.unget(data)
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-character-after-attribute-value"})
+                                    "unexpected-character-after-attribute-value"})
             self.stream.unget(data)
             self.state = self.beforeAttributeNameState
         return True
@@ -1092,12 +1091,12 @@ def selfClosingStartTagState(self):
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"],
                                     "data":
-                                        "unexpected-EOF-after-solidus-in-tag"})
+                                    "unexpected-EOF-after-solidus-in-tag"})
             self.stream.unget(data)
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-character-after-solidus-in-tag"})
+                                    "unexpected-character-after-solidus-in-tag"})
             self.stream.unget(data)
             self.state = self.beforeAttributeNameState
         return True
@@ -1109,7 +1108,7 @@ def bogusCommentState(self):
         data = self.stream.charsUntil(">")
         data = data.replace("\u0000", "\uFFFD")
         self.tokenQueue.append(
-          {"type": tokenTypes["Comment"], "data": data})
+            {"type": tokenTypes["Comment"], "data": data})
 
         # Eat the character directly after the bogus comment which is either a
         # ">" or an EOF.
@@ -1155,7 +1154,7 @@ def markupDeclarationOpenState(self):
                 return True
 
         self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-          "expected-dashes-or-doctype"})
+                                "expected-dashes-or-doctype"})
 
         while charStack:
             self.stream.unget(charStack.pop())
@@ -1172,12 +1171,12 @@ def commentStartState(self):
             self.currentToken["data"] += "\uFFFD"
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "incorrect-comment"})
+                                    "incorrect-comment"})
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-comment"})
+                                    "eof-in-comment"})
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
@@ -1195,12 +1194,12 @@ def commentStartDashState(self):
             self.currentToken["data"] += "-\uFFFD"
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "incorrect-comment"})
+                                    "incorrect-comment"})
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-comment"})
+                                    "eof-in-comment"})
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
@@ -1208,7 +1207,6 @@ def commentStartDashState(self):
             self.state = self.commentState
         return True
 
-
     def commentState(self):
         data = self.stream.char()
         if data == "-":
@@ -1238,7 +1236,7 @@ def commentEndDashState(self):
             self.state = self.commentState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-comment-end-dash"})
+                                    "eof-in-comment-end-dash"})
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
@@ -1258,21 +1256,21 @@ def commentEndState(self):
             self.state = self.commentState
         elif data == "!":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-bang-after-double-dash-in-comment"})
+                                    "unexpected-bang-after-double-dash-in-comment"})
             self.state = self.commentEndBangState
         elif data == "-":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-             "unexpected-dash-after-double-dash-in-comment"})
+                                    "unexpected-dash-after-double-dash-in-comment"})
             self.currentToken["data"] += data
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-comment-double-dash"})
+                                    "eof-in-comment-double-dash"})
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
             # XXX
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-comment"})
+                                    "unexpected-char-in-comment"})
             self.currentToken["data"] += "--" + data
             self.state = self.commentState
         return True
@@ -1292,7 +1290,7 @@ def commentEndBangState(self):
             self.state = self.commentState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-comment-end-bang-state"})
+                                    "eof-in-comment-end-bang-state"})
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
@@ -1306,13 +1304,13 @@ def doctypeState(self):
             self.state = self.beforeDoctypeNameState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-doctype-name-but-got-eof"})
+                                    "expected-doctype-name-but-got-eof"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "need-space-after-doctype"})
+                                    "need-space-after-doctype"})
             self.stream.unget(data)
             self.state = self.beforeDoctypeNameState
         return True
@@ -1323,7 +1321,7 @@ def beforeDoctypeNameState(self):
             pass
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-doctype-name-but-got-right-bracket"})
+                                    "expected-doctype-name-but-got-right-bracket"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
@@ -1334,7 +1332,7 @@ def beforeDoctypeNameState(self):
             self.state = self.doctypeNameState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "expected-doctype-name-but-got-eof"})
+                                    "expected-doctype-name-but-got-eof"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
@@ -1359,7 +1357,7 @@ def doctypeNameState(self):
             self.state = self.doctypeNameState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype-name"})
+                                    "eof-in-doctype-name"})
             self.currentToken["correct"] = False
             self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
             self.tokenQueue.append(self.currentToken)
@@ -1379,7 +1377,7 @@ def afterDoctypeNameState(self):
             self.currentToken["correct"] = False
             self.stream.unget(data)
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
@@ -1412,8 +1410,8 @@ def afterDoctypeNameState(self):
             # and needs to be ungetted
             self.stream.unget(data)
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-                "expected-space-or-right-bracket-in-doctype", "datavars":
-                {"data": data}})
+                                    "expected-space-or-right-bracket-in-doctype", "datavars":
+                                    {"data": data}})
             self.currentToken["correct"] = False
             self.state = self.bogusDoctypeState
 
@@ -1425,12 +1423,12 @@ def afterDoctypePublicKeywordState(self):
             self.state = self.beforeDoctypePublicIdentifierState
         elif data in ("'", '"'):
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-doctype"})
+                                    "unexpected-char-in-doctype"})
             self.stream.unget(data)
             self.state = self.beforeDoctypePublicIdentifierState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
@@ -1451,19 +1449,19 @@ def beforeDoctypePublicIdentifierState(self):
             self.state = self.doctypePublicIdentifierSingleQuotedState
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-end-of-doctype"})
+                                    "unexpected-end-of-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-doctype"})
+                                    "unexpected-char-in-doctype"})
             self.currentToken["correct"] = False
             self.state = self.bogusDoctypeState
         return True
@@ -1478,13 +1476,13 @@ def doctypePublicIdentifierDoubleQuotedState(self):
             self.currentToken["publicId"] += "\uFFFD"
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-end-of-doctype"})
+                                    "unexpected-end-of-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
@@ -1502,13 +1500,13 @@ def doctypePublicIdentifierSingleQuotedState(self):
             self.currentToken["publicId"] += "\uFFFD"
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-end-of-doctype"})
+                                    "unexpected-end-of-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
@@ -1525,23 +1523,23 @@ def afterDoctypePublicIdentifierState(self):
             self.state = self.dataState
         elif data == '"':
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-doctype"})
+                                    "unexpected-char-in-doctype"})
             self.currentToken["systemId"] = ""
             self.state = self.doctypeSystemIdentifierDoubleQuotedState
         elif data == "'":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-doctype"})
+                                    "unexpected-char-in-doctype"})
             self.currentToken["systemId"] = ""
             self.state = self.doctypeSystemIdentifierSingleQuotedState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-doctype"})
+                                    "unexpected-char-in-doctype"})
             self.currentToken["correct"] = False
             self.state = self.bogusDoctypeState
         return True
@@ -1561,13 +1559,13 @@ def betweenDoctypePublicAndSystemIdentifiersState(self):
             self.state = self.doctypeSystemIdentifierSingleQuotedState
         elif data == EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-doctype"})
+                                    "unexpected-char-in-doctype"})
             self.currentToken["correct"] = False
             self.state = self.bogusDoctypeState
         return True
@@ -1578,12 +1576,12 @@ def afterDoctypeSystemKeywordState(self):
             self.state = self.beforeDoctypeSystemIdentifierState
         elif data in ("'", '"'):
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-doctype"})
+                                    "unexpected-char-in-doctype"})
             self.stream.unget(data)
             self.state = self.beforeDoctypeSystemIdentifierState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
@@ -1604,19 +1602,19 @@ def beforeDoctypeSystemIdentifierState(self):
             self.state = self.doctypeSystemIdentifierSingleQuotedState
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-doctype"})
+                                    "unexpected-char-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-doctype"})
+                                    "unexpected-char-in-doctype"})
             self.currentToken["correct"] = False
             self.state = self.bogusDoctypeState
         return True
@@ -1631,13 +1629,13 @@ def doctypeSystemIdentifierDoubleQuotedState(self):
             self.currentToken["systemId"] += "\uFFFD"
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-end-of-doctype"})
+                                    "unexpected-end-of-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
@@ -1655,13 +1653,13 @@ def doctypeSystemIdentifierSingleQuotedState(self):
             self.currentToken["systemId"] += "\uFFFD"
         elif data == ">":
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-end-of-doctype"})
+                                    "unexpected-end-of-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
@@ -1678,13 +1676,13 @@ def afterDoctypeSystemIdentifierState(self):
             self.state = self.dataState
         elif data is EOF:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "eof-in-doctype"})
+                                    "eof-in-doctype"})
             self.currentToken["correct"] = False
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
-              "unexpected-char-in-doctype"})
+                                    "unexpected-char-in-doctype"})
             self.state = self.bogusDoctypeState
         return True
 
@@ -1719,7 +1717,7 @@ def cdataSectionState(self):
                     data.append(char)
 
         data = "".join(data)
-        #Deal with null here rather than in the parser
+        # Deal with null here rather than in the parser
         nullCount = data.count("\u0000")
         if nullCount > 0:
             for i in range(nullCount):
diff --git a/html5lib/treebuilders/__init__.py b/html5lib/treebuilders/__init__.py
index e44e9914..393309fd 100755
--- a/html5lib/treebuilders/__init__.py
+++ b/html5lib/treebuilders/__init__.py
@@ -34,6 +34,7 @@
 
 treeBuilderCache = {}
 
+
 def getTreeBuilder(treeType, implementation=None, **kwargs):
     """Get a TreeBuilder class for various types of tree with built-in support
 
@@ -59,7 +60,7 @@ def getTreeBuilder(treeType, implementation=None, **kwargs):
         if treeType == "dom":
             from . import dom
             # XXX: Keep backwards compatibility by using minidom if no implementation is given
-            if implementation == None:
+            if implementation is None:
                 from xml.dom import minidom
                 implementation = minidom
             # XXX: NEVER cache here, caching is done in the dom submodule
@@ -72,7 +73,7 @@ def getTreeBuilder(treeType, implementation=None, **kwargs):
             treeBuilderCache[treeType] = etree_lxml.TreeBuilder
         elif treeType == "etree":
             # Come up with a sane default
-            if implementation == None:
+            if implementation is None:
                 try:
                     import xml.etree.cElementTree as ET
                 except ImportError:
@@ -88,5 +89,5 @@ def getTreeBuilder(treeType, implementation=None, **kwargs):
             # NEVER cache here, caching is done in the etree submodule
             return etree.getETreeModule(implementation, **kwargs).TreeBuilder
         else:
-            raise ValueError("""Unrecognised treebuilder "%s" """%treeType)
+            raise ValueError("""Unrecognised treebuilder "%s" """ % treeType)
     return treeBuilderCache.get(treeType)
diff --git a/html5lib/treebuilders/_base.py b/html5lib/treebuilders/_base.py
index 9da17791..90846e77 100755
--- a/html5lib/treebuilders/_base.py
+++ b/html5lib/treebuilders/_base.py
@@ -9,15 +9,15 @@
 Marker = None
 
 listElementsMap = {
-    None:(frozenset(scopingElements), False),
-    "button":(frozenset(scopingElements | set([(namespaces["html"], "button")])), False),
-    "list":(frozenset(scopingElements | set([(namespaces["html"], "ol"),
-                                   (namespaces["html"], "ul")])), False),
-    "table":(frozenset([(namespaces["html"], "html"),
-                  (namespaces["html"], "table")]), False),
-    "select":(frozenset([(namespaces["html"], "optgroup"),
-                   (namespaces["html"], "option")]), True)
-    }
+    None: (frozenset(scopingElements), False),
+    "button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False),
+    "list": (frozenset(scopingElements | set([(namespaces["html"], "ol"),
+                                              (namespaces["html"], "ul")])), False),
+    "table": (frozenset([(namespaces["html"], "html"),
+                         (namespaces["html"], "table")]), False),
+    "select": (frozenset([(namespaces["html"], "optgroup"),
+                          (namespaces["html"], "option")]), True)
+}
 
 
 class Node(object):
@@ -40,13 +40,13 @@ def __init__(self, name):
         self._flags = []
 
     def __str__(self):
-        attributesStr =  " ".join(["%s=\"%s\""%(name, value)
-                                   for name, value in
-                                   self.attributes.items()])
+        attributesStr = " ".join(["%s=\"%s\"" % (name, value)
+                                  for name, value in
+                                  self.attributes.items()])
         if attributesStr:
-            return "<%s %s>"%(self.name,attributesStr)
+            return "<%s %s>" % (self.name, attributesStr)
         else:
-            return "<%s>"%(self.name)
+            return "<%s>" % (self.name)
 
     def __repr__(self):
         return "<%s>" % (self.name)
@@ -78,7 +78,7 @@ def reparentChildren(self, newParent):
         This is needed so that trees that don't store text as nodes move the
         text in the correct way
         """
-        #XXX - should this method be made more general?
+        # XXX - should this method be made more general?
         for child in self.childNodes:
             newParent.appendChild(child)
         self.childNodes = []
@@ -89,12 +89,12 @@ def cloneNode(self):
         """
         raise NotImplementedError
 
-
     def hasContent(self):
         """Return true if the node has children or text, false otherwise
         """
         raise NotImplementedError
 
+
 class ActiveFormattingElements(list):
     def append(self, node):
         equalCount = 0
@@ -118,6 +118,7 @@ def nodesEqual(self, node1, node2):
 
         return True
 
+
 class TreeBuilder(object):
     """Base treebuilder implementation
     documentClass - the class to use for the bottommost node of a document
@@ -126,19 +127,19 @@ class TreeBuilder(object):
     doctypeClass - the class to use for doctypes
     """
 
-    #Document class
+    # Document class
     documentClass = None
 
-    #The class to use for creating a node
+    # The class to use for creating a node
     elementClass = None
 
-    #The class to use for creating comments
+    # The class to use for creating comments
     commentClass = None
 
-    #The class to use for creating doctypes
+    # The class to use for creating doctypes
     doctypeClass = None
 
-    #Fragment class
+    # Fragment class
     fragmentClass = None
 
     def __init__(self, namespaceHTMLElements):
@@ -152,7 +153,7 @@ def reset(self):
         self.openElements = []
         self.activeFormattingElements = ActiveFormattingElements()
 
-        #XXX - rename these to headElement, formElement
+        # XXX - rename these to headElement, formElement
         self.headPointer = None
         self.formPointer = None
 
@@ -162,20 +163,20 @@ def reset(self):
 
     def elementInScope(self, target, variant=None):
 
-        #If we pass a node in we match that. if we pass a string
-        #match any node with that name
+        # If we pass a node in we match that. if we pass a string
+        # match any node with that name
         exactNode = hasattr(target, "nameTuple")
 
         listElements, invert = listElementsMap[variant]
 
         for node in reversed(self.openElements):
             if (node.name == target and not exactNode or
-                node == target and exactNode):
+                    node == target and exactNode):
                 return True
             elif (invert ^ (node.nameTuple in listElements)):
                 return False
 
-        assert False # We should never reach this point
+        assert False  # We should never reach this point
 
     def reconstructActiveFormattingElements(self):
         # Within this algorithm the order of steps described in the
@@ -195,7 +196,7 @@ def reconstructActiveFormattingElements(self):
         # Step 6
         while entry != Marker and entry not in self.openElements:
             if i == 0:
-                #This will be reset to 0 below
+                # This will be reset to 0 below
                 i = -1
                 break
             i -= 1
@@ -208,13 +209,13 @@ def reconstructActiveFormattingElements(self):
 
             # Step 8
             entry = self.activeFormattingElements[i]
-            clone = entry.cloneNode() #Mainly to get a new copy of the attributes
+            clone = entry.cloneNode()  # Mainly to get a new copy of the attributes
 
             # Step 9
-            element = self.insertElement({"type":"StartTag",
-                                          "name":clone.name,
-                                          "namespace":clone.namespace,
-                                          "data":clone.attributes})
+            element = self.insertElement({"type": "StartTag",
+                                          "name": clone.name,
+                                          "namespace": clone.namespace,
+                                          "data": clone.attributes})
 
             # Step 10
             self.activeFormattingElements[i] = element
@@ -284,7 +285,7 @@ def _setInsertFromTable(self, value):
 
     def insertElementNormal(self, token):
         name = token["name"]
-        assert isinstance(name, text_type), "Element %s not unicode"%name
+        assert isinstance(name, text_type), "Element %s not unicode" % name
         namespace = token.get("namespace", self.defaultNamespace)
         element = self.elementClass(name, namespace)
         element.attributes = token["data"]
@@ -298,8 +299,8 @@ def insertElementTable(self, token):
         if self.openElements[-1].name not in tableInsertModeElements:
             return self.insertElementNormal(token)
         else:
-            #We should be in the InTable mode. This means we want to do
-            #special magic element rearranging
+            # We should be in the InTable mode. This means we want to do
+            # special magic element rearranging
             parent, insertBefore = self.getTableMisnestedNodePosition()
             if insertBefore is None:
                 parent.appendChild(element)
@@ -329,7 +330,7 @@ def getTableMisnestedNodePosition(self):
         # The foster parent element is the one which comes before the most
         # recently opened table element
         # XXX - this is really inelegant
-        lastTable=None
+        lastTable = None
         fosterParent = None
         insertBefore = None
         for elm in self.openElements[::-1]:
@@ -353,7 +354,7 @@ def generateImpliedEndTags(self, exclude=None):
         name = self.openElements[-1].name
         # XXX td, th and tr are not actually needed
         if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"))
-            and name != exclude):
+                and name != exclude):
             self.openElements.pop()
             # XXX This is not entirely what the specification says. We should
             # investigate it more closely.
@@ -365,7 +366,7 @@ def getDocument(self):
 
     def getFragment(self):
         "Return the final fragment"
-        #assert self.innerHTML
+        # assert self.innerHTML
         fragment = self.fragmentClass()
         self.openElements[0].reparentChildren(fragment)
         return fragment
diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py
index f48a53fe..55f34f3f 100644
--- a/html5lib/treebuilders/dom.py
+++ b/html5lib/treebuilders/dom.py
@@ -9,22 +9,30 @@
 from html5lib.constants import namespaces
 from html5lib.utils import moduleFactoryFactory
 
+
 def getDomBuilder(DomImplementation):
     Dom = DomImplementation
+
     class AttrList(object):
         def __init__(self, element):
             self.element = element
+
         def __iter__(self):
             return list(self.element.attributes.items()).__iter__()
+
         def __setitem__(self, name, value):
             self.element.setAttribute(name, value)
+
         def __len__(self):
             return len(list(self.element.attributes.items()))
+
         def items(self):
             return [(item[0], item[1]) for item in
-                     list(self.element.attributes.items())]
+                    list(self.element.attributes.items())]
+
         def keys(self):
             return list(self.element.attributes.keys())
+
         def __getitem__(self, name):
             return self.element.getAttribute(name)
 
@@ -39,7 +47,7 @@ def __init__(self, element):
             _base.Node.__init__(self, element.nodeName)
             self.element = element
 
-        namespace = property(lambda self:hasattr(self.element, "namespaceURI")
+        namespace = property(lambda self: hasattr(self.element, "namespaceURI")
                              and self.element.namespaceURI or None)
 
         def appendChild(self, node):
@@ -94,7 +102,7 @@ def hasContent(self):
             return self.element.hasChildNodes()
 
         def getNameTuple(self):
-            if self.namespace == None:
+            if self.namespace is None:
                 return namespaces["html"], self.name
             else:
                 return self.namespace, self.name
@@ -103,7 +111,7 @@ def getNameTuple(self):
 
     class TreeBuilder(_base.TreeBuilder):
         def documentClass(self):
-            self.dom = Dom.getDOMImplementation().createDocument(None,None,None)
+            self.dom = Dom.getDOMImplementation().createDocument(None, None, None)
             return weakref.proxy(self)
 
         def insertDoctype(self, token):
@@ -144,14 +152,14 @@ def getFragment(self):
             return _base.TreeBuilder.getFragment(self).element
 
         def insertText(self, data, parent=None):
-            data=data
+            data = data
             if parent != self:
                 _base.TreeBuilder.insertText(self, data, parent)
             else:
                 # HACK: allow text nodes as children of the document node
                 if hasattr(self.dom, '_child_node_types'):
                     if not Node.TEXT_NODE in self.dom._child_node_types:
-                        self.dom._child_node_types=list(self.dom._child_node_types)
+                        self.dom._child_node_types = list(self.dom._child_node_types)
                         self.dom._child_node_types.append(Node.TEXT_NODE)
                 self.dom.appendChild(self.dom.createTextNode(data))
 
@@ -160,34 +168,35 @@ def insertText(self, data, parent=None):
     def testSerializer(element):
         element.normalize()
         rv = []
+
         def serializeElement(element, indent=0):
             if element.nodeType == Node.DOCUMENT_TYPE_NODE:
                 if element.name:
                     if element.publicId or element.systemId:
                         publicId = element.publicId or ""
                         systemId = element.systemId or ""
-                        rv.append( """|%s<!DOCTYPE %s "%s" "%s">"""%(
-                                ' '*indent, element.name, publicId, systemId))
+                        rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
+                                  (' ' * indent, element.name, publicId, systemId))
                     else:
-                        rv.append("|%s<!DOCTYPE %s>"%(' '*indent, element.name))
+                        rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, element.name))
                 else:
-                    rv.append("|%s<!DOCTYPE >"%(' '*indent,))
+                    rv.append("|%s<!DOCTYPE >" % (' ' * indent,))
             elif element.nodeType == Node.DOCUMENT_NODE:
                 rv.append("#document")
             elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
                 rv.append("#document-fragment")
             elif element.nodeType == Node.COMMENT_NODE:
-                rv.append("|%s<!-- %s -->"%(' '*indent, element.nodeValue))
+                rv.append("|%s<!-- %s -->" % (' ' * indent, element.nodeValue))
             elif element.nodeType == Node.TEXT_NODE:
-                rv.append("|%s\"%s\"" %(' '*indent, element.nodeValue))
+                rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue))
             else:
                 if (hasattr(element, "namespaceURI") and
-                    element.namespaceURI != None):
-                    name = "%s %s"%(constants.prefixes[element.namespaceURI],
-                                    element.nodeName)
+                        element.namespaceURI is not None):
+                    name = "%s %s" % (constants.prefixes[element.namespaceURI],
+                                      element.nodeName)
                 else:
                     name = element.nodeName
-                rv.append("|%s<%s>"%(' '*indent, name))
+                rv.append("|%s<%s>" % (' ' * indent, name))
                 if element.hasAttributes():
                     attributes = []
                     for i in range(len(element.attributes)):
@@ -196,13 +205,13 @@ def serializeElement(element, indent=0):
                         value = attr.value
                         ns = attr.namespaceURI
                         if ns:
-                            name = "%s %s"%(constants.prefixes[ns], attr.localName)
+                            name = "%s %s" % (constants.prefixes[ns], attr.localName)
                         else:
                             name = attr.nodeName
                         attributes.append((name, value))
 
                     for name, value in sorted(attributes):
-                        rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
+                        rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
             indent += 2
             for child in element.childNodes:
                 serializeElement(child, indent)
@@ -210,63 +219,68 @@ def serializeElement(element, indent=0):
 
         return "\n".join(rv)
 
-    def dom2sax(node, handler, nsmap={'xml':XML_NAMESPACE}):
-      if node.nodeType == Node.ELEMENT_NODE:
-        if not nsmap:
-          handler.startElement(node.nodeName, node.attributes)
-          for child in node.childNodes: dom2sax(child, handler, nsmap)
-          handler.endElement(node.nodeName)
+    def dom2sax(node, handler, nsmap={'xml': XML_NAMESPACE}):
+        if node.nodeType == Node.ELEMENT_NODE:
+            if not nsmap:
+                handler.startElement(node.nodeName, node.attributes)
+                for child in node.childNodes:
+                    dom2sax(child, handler, nsmap)
+                handler.endElement(node.nodeName)
+            else:
+                attributes = dict(node.attributes.itemsNS())
+
+                # gather namespace declarations
+                prefixes = []
+                for attrname in list(node.attributes.keys()):
+                    attr = node.getAttributeNode(attrname)
+                    if (attr.namespaceURI == XMLNS_NAMESPACE or
+                       (attr.namespaceURI is None and attr.nodeName.startswith('xmlns'))):
+                        prefix = (attr.nodeName != 'xmlns' and attr.nodeName or None)
+                        handler.startPrefixMapping(prefix, attr.nodeValue)
+                        prefixes.append(prefix)
+                        nsmap = nsmap.copy()
+                        nsmap[prefix] = attr.nodeValue
+                        del attributes[(attr.namespaceURI, attr.nodeName)]
+
+                # apply namespace declarations
+                for attrname in list(node.attributes.keys()):
+                    attr = node.getAttributeNode(attrname)
+                    if attr.namespaceURI is None and ':' in attr.nodeName:
+                        prefix = attr.nodeName.split(':')[0]
+                        if prefix in nsmap:
+                            del attributes[(attr.namespaceURI, attr.nodeName)]
+                            attributes[(nsmap[prefix], attr.nodeName)] = attr.nodeValue
+
+                # SAX events
+                ns = node.namespaceURI or nsmap.get(None, None)
+                handler.startElementNS((ns, node.nodeName), node.nodeName, attributes)
+                for child in node.childNodes:
+                    dom2sax(child, handler, nsmap)
+                handler.endElementNS((ns, node.nodeName), node.nodeName)
+                for prefix in prefixes:
+                    handler.endPrefixMapping(prefix)
+
+        elif node.nodeType in [Node.TEXT_NODE, Node.CDATA_SECTION_NODE]:
+            handler.characters(node.nodeValue)
+
+        elif node.nodeType == Node.DOCUMENT_NODE:
+            handler.startDocument()
+            for child in node.childNodes:
+                dom2sax(child, handler, nsmap)
+            handler.endDocument()
+
+        elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
+            for child in node.childNodes:
+                dom2sax(child, handler, nsmap)
+
         else:
-          attributes = dict(node.attributes.itemsNS())
-
-          # gather namespace declarations
-          prefixes = []
-          for attrname in list(node.attributes.keys()):
-            attr = node.getAttributeNode(attrname)
-            if (attr.namespaceURI == XMLNS_NAMESPACE or
-               (attr.namespaceURI == None and attr.nodeName.startswith('xmlns'))):
-              prefix = (attr.nodeName != 'xmlns' and attr.nodeName or None)
-              handler.startPrefixMapping(prefix, attr.nodeValue)
-              prefixes.append(prefix)
-              nsmap = nsmap.copy()
-              nsmap[prefix] = attr.nodeValue
-              del attributes[(attr.namespaceURI, attr.nodeName)]
-
-          # apply namespace declarations
-          for attrname in list(node.attributes.keys()):
-            attr = node.getAttributeNode(attrname)
-            if attr.namespaceURI == None and ':' in attr.nodeName:
-              prefix = attr.nodeName.split(':')[0]
-              if prefix in nsmap:
-                del attributes[(attr.namespaceURI, attr.nodeName)]
-                attributes[(nsmap[prefix],attr.nodeName)]=attr.nodeValue
-
-          # SAX events
-          ns = node.namespaceURI or nsmap.get(None,None)
-          handler.startElementNS((ns,node.nodeName), node.nodeName, attributes)
-          for child in node.childNodes: dom2sax(child, handler, nsmap)
-          handler.endElementNS((ns, node.nodeName), node.nodeName)
-          for prefix in prefixes: handler.endPrefixMapping(prefix)
-
-      elif node.nodeType in [Node.TEXT_NODE, Node.CDATA_SECTION_NODE]:
-        handler.characters(node.nodeValue)
-
-      elif node.nodeType == Node.DOCUMENT_NODE:
-        handler.startDocument()
-        for child in node.childNodes: dom2sax(child, handler, nsmap)
-        handler.endDocument()
-
-      elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
-        for child in node.childNodes: dom2sax(child, handler, nsmap)
-
-      else:
-        # ATTRIBUTE_NODE
-        # ENTITY_NODE
-        # PROCESSING_INSTRUCTION_NODE
-        # COMMENT_NODE
-        # DOCUMENT_TYPE_NODE
-        # NOTATION_NODE
-        pass
+            # ATTRIBUTE_NODE
+            # ENTITY_NODE
+            # PROCESSING_INSTRUCTION_NODE
+            # COMMENT_NODE
+            # DOCUMENT_TYPE_NODE
+            # NOTATION_NODE
+            pass
 
     return locals()
 
@@ -278,4 +292,4 @@ def dom2sax(node, handler, nsmap={'xml':XML_NAMESPACE}):
 # Keep backwards compatibility with things that directly load
 # classes/functions from this module
 for key, value in list(getDomModule(minidom).__dict__.items()):
-	globals()[key] = value
+    globals()[key] = value
diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py
index 8dc9c86b..ed2311c4 100755
--- a/html5lib/treebuilders/etree.py
+++ b/html5lib/treebuilders/etree.py
@@ -11,9 +11,11 @@
 
 tag_regexp = re.compile("{([^}]*)}(.*)")
 
+
 def getETreeBuilder(ElementTreeImplementation, fullTree=False):
     ElementTree = ElementTreeImplementation
     ElementTreeCommentType = ElementTree.Comment("asd").tag
+
     class Element(_base.Node):
         def __init__(self, name, namespace=None):
             self._name = name
@@ -32,7 +34,7 @@ def _getETreeTag(self, name, namespace):
             if namespace is None:
                 etree_tag = name
             else:
-                etree_tag = "{%s}%s"%(namespace, name)
+                etree_tag = "{%s}%s" % (namespace, name)
             return etree_tag
 
         def _setName(self, name):
@@ -57,13 +59,13 @@ def _getAttributes(self):
             return self._element.attrib
 
         def _setAttributes(self, attributes):
-            #Delete existing attributes first
-            #XXX - there may be a better way to do this...
+            # Delete existing attributes first
+            # XXX - there may be a better way to do this...
             for key in list(self._element.attrib.keys()):
                 del self._element.attrib[key]
             for key, value in attributes.items():
                 if isinstance(key, tuple):
-                    name = "{%s}%s"%(key[2], key[1])
+                    name = "{%s}%s" % (key[2], key[1])
                 else:
                     name = key
                 self._element.set(name, value)
@@ -72,6 +74,7 @@ def _setAttributes(self, attributes):
 
         def _getChildNodes(self):
             return self._childNodes
+
         def _setChildNodes(self, value):
             del self._element[:]
             self._childNodes = []
@@ -96,7 +99,7 @@ def insertBefore(self, node, refNode):
 
         def removeChild(self, node):
             self._element.remove(node._element)
-            node.parent=None
+            node.parent = None
 
         def insertText(self, data, insertBefore=None):
             if not(len(self._element)):
@@ -104,18 +107,18 @@ def insertText(self, data, insertBefore=None):
                     self._element.text = ""
                 self._element.text += data
             elif insertBefore is None:
-                #Insert the text as the tail of the last child element
+                # Insert the text as the tail of the last child element
                 if not self._element[-1].tail:
                     self._element[-1].tail = ""
                 self._element[-1].tail += data
             else:
-                #Insert the text before the specified node
+                # Insert the text before the specified node
                 children = list(self._element)
                 index = children.index(insertBefore._element)
                 if index > 0:
-                    if not self._element[index-1].tail:
-                        self._element[index-1].tail = ""
-                    self._element[index-1].tail += data
+                    if not self._element[index - 1].tail:
+                        self._element[index - 1].tail = ""
+                    self._element[index - 1].tail += data
                 else:
                     if not self._element.text:
                         self._element.text = ""
@@ -140,8 +143,8 @@ def reparentChildren(self, newParent):
 
     class Comment(Element):
         def __init__(self, data):
-            #Use the superclass constructor to set all properties on the
-            #wrapper element
+            # Use the superclass constructor to set all properties on the
+            # wrapper element
             self._element = ElementTree.Comment(data)
             self.parent = None
             self._childNodes = []
@@ -190,6 +193,7 @@ def __init__(self):
 
     def testSerializer(element):
         rv = []
+
         def serializeElement(element, indent=0):
             if not(hasattr(element, "tag")):
                 element = element.getroot()
@@ -197,18 +201,19 @@ def serializeElement(element, indent=0):
                 if element.get("publicId") or element.get("systemId"):
                     publicId = element.get("publicId") or ""
                     systemId = element.get("systemId") or ""
-                    rv.append( """<!DOCTYPE %s "%s" "%s">"""%(
-                            element.text, publicId, systemId))
+                    rv.append("""<!DOCTYPE %s "%s" "%s">""" %
+                              (element.text, publicId, systemId))
                 else:
-                    rv.append("<!DOCTYPE %s>"%(element.text,))
+                    rv.append("<!DOCTYPE %s>" % (element.text,))
             elif element.tag == "DOCUMENT_ROOT":
                 rv.append("#document")
                 assert element.text is None
                 assert element.tail is None
             elif element.tag == ElementTreeCommentType:
-                rv.append("|%s<!-- %s -->"%(' '*indent, element.text))
+                rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
             else:
-                assert isinstance(element.tag, text_type), "Expected unicode, got %s, %s"%(type(element.tag), element.tag)
+                assert isinstance(element.tag, text_type), \
+                    "Expected unicode, got %s, %s" % (type(element.tag), element.tag)
                 nsmatch = tag_regexp.match(element.tag)
 
                 if nsmatch is None:
@@ -216,8 +221,8 @@ def serializeElement(element, indent=0):
                 else:
                     ns, name = nsmatch.groups()
                     prefix = constants.prefixes[ns]
-                    name = "%s %s"%(prefix, name)
-                rv.append("|%s<%s>"%(' '*indent, name))
+                    name = "%s %s" % (prefix, name)
+                rv.append("|%s<%s>" % (' ' * indent, name))
 
                 if hasattr(element, "attrib"):
                     attributes = []
@@ -226,20 +231,20 @@ def serializeElement(element, indent=0):
                         if nsmatch is not None:
                             ns, name = nsmatch.groups()
                             prefix = constants.prefixes[ns]
-                            attr_string = "%s %s"%(prefix, name)
+                            attr_string = "%s %s" % (prefix, name)
                         else:
                             attr_string = name
                         attributes.append((attr_string, value))
 
                     for name, value in sorted(attributes):
-                        rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
+                        rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
                 if element.text:
-                    rv.append("|%s\"%s\"" %(' '*(indent+2), element.text))
+                    rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
             indent += 2
             for child in element:
                 serializeElement(child, indent)
             if element.tail:
-                rv.append("|%s\"%s\"" %(' '*(indent-2), element.tail))
+                rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
         serializeElement(element, 0)
 
         return "\n".join(rv)
@@ -248,6 +253,7 @@ def tostring(element):
         """Serialize an element and its child nodes to a string"""
         rv = []
         filter = ihatexml.InfosetFilter()
+
         def serializeElement(element):
             if type(element) == type(ElementTree.ElementTree):
                 element = element.getroot()
@@ -256,10 +262,10 @@ def serializeElement(element):
                 if element.get("publicId") or element.get("systemId"):
                     publicId = element.get("publicId") or ""
                     systemId = element.get("systemId") or ""
-                    rv.append( """<!DOCTYPE %s PUBLIC "%s" "%s">"""%(
-                            element.text, publicId, systemId))
+                    rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" %
+                              (element.text, publicId, systemId))
                 else:
-                    rv.append("<!DOCTYPE %s>"%(element.text,))
+                    rv.append("<!DOCTYPE %s>" % (element.text,))
             elif element.tag == "DOCUMENT_ROOT":
                 assert element.text is None
                 assert element.tail is None
@@ -268,23 +274,23 @@ def serializeElement(element):
                     serializeElement(child)
 
             elif element.tag == ElementTreeCommentType:
-                rv.append("<!--%s-->"%(element.text,))
+                rv.append("<!--%s-->" % (element.text,))
             else:
-                #This is assumed to be an ordinary element
+                # This is assumed to be an ordinary element
                 if not element.attrib:
-                    rv.append("<%s>"%(filter.fromXmlName(element.tag),))
+                    rv.append("<%s>" % (filter.fromXmlName(element.tag),))
                 else:
-                    attr = " ".join(["%s=\"%s\""%(
-                                filter.fromXmlName(name), value)
-                                     for name, value in element.attrib.items()])
-                    rv.append("<%s %s>"%(element.tag, attr))
+                    attr = " ".join(["%s=\"%s\"" % (
+                        filter.fromXmlName(name), value)
+                        for name, value in element.attrib.items()])
+                    rv.append("<%s %s>" % (element.tag, attr))
                 if element.text:
                     rv.append(element.text)
 
                 for child in element:
                     serializeElement(child)
 
-                rv.append("</%s>"%(element.tag,))
+                rv.append("</%s>" % (element.tag,))
 
             if element.tail:
                 rv.append(element.tail)
@@ -309,7 +315,7 @@ def getDocument(self):
             else:
                 if self.defaultNamespace is not None:
                     return self.document._element.find(
-                        "{%s}html"%self.defaultNamespace)
+                        "{%s}html" % self.defaultNamespace)
                 else:
                     return self.document._element.find("html")
 
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index bce09747..4ca894b7 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -36,6 +36,7 @@ def __init__(self, name, publicId, systemId):
         self.publicId = publicId
         self.systemId = systemId
 
+
 class Document(object):
     def __init__(self):
         self._elementTree = None
@@ -49,44 +50,46 @@ def _getChildNodes(self):
 
     childNodes = property(_getChildNodes)
 
+
 def testSerializer(element):
     rv = []
     finalText = None
     infosetFilter = ihatexml.InfosetFilter()
+
     def serializeElement(element, indent=0):
         if not hasattr(element, "tag"):
-            if  hasattr(element, "getroot"):
-                #Full tree case
+            if hasattr(element, "getroot"):
+                # Full tree case
                 rv.append("#document")
                 if element.docinfo.internalDTD:
                     if not (element.docinfo.public_id or
                             element.docinfo.system_url):
-                        dtd_str = "<!DOCTYPE %s>"%element.docinfo.root_name
+                        dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
                     else:
-                        dtd_str = """<!DOCTYPE %s "%s" "%s">"""%(
+                        dtd_str = """<!DOCTYPE %s "%s" "%s">""" % (
                             element.docinfo.root_name,
                             element.docinfo.public_id,
                             element.docinfo.system_url)
-                    rv.append("|%s%s"%(' '*(indent+2), dtd_str))
+                    rv.append("|%s%s" % (' ' * (indent + 2), dtd_str))
                 next_element = element.getroot()
                 while next_element.getprevious() is not None:
                     next_element = next_element.getprevious()
                 while next_element is not None:
-                    serializeElement(next_element, indent+2)
+                    serializeElement(next_element, indent + 2)
                     next_element = next_element.getnext()
             elif isinstance(element, str) or isinstance(element, bytes):
-                #Text in a fragment
+                # Text in a fragment
                 assert isinstance(element, str) or sys.version_info.major == 2
-                rv.append("|%s\"%s\""%(' '*indent, element))
+                rv.append("|%s\"%s\"" % (' ' * indent, element))
             else:
-                #Fragment case
+                # Fragment case
                 rv.append("#document-fragment")
                 for next_element in element:
-                    serializeElement(next_element, indent+2)
+                    serializeElement(next_element, indent + 2)
         elif type(element.tag) == type(etree.Comment):
-            rv.append("|%s<!-- %s -->"%(' '*indent, element.text))
+            rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
             if hasattr(element, "tail") and element.tail:
-                rv.append("|%s\"%s\"" %(' '*indent, element.tail))
+                rv.append("|%s\"%s\"" % (' ' * indent, element.tail))
         else:
             assert isinstance(element, etree._Element)
             nsmatch = etree_builders.tag_regexp.match(element.tag)
@@ -94,11 +97,11 @@ def serializeElement(element, indent=0):
                 ns = nsmatch.group(1)
                 tag = nsmatch.group(2)
                 prefix = constants.prefixes[ns]
-                rv.append("|%s<%s %s>"%(' '*indent, prefix,
-                                        infosetFilter.fromXmlName(tag)))
+                rv.append("|%s<%s %s>" % (' ' * indent, prefix,
+                                          infosetFilter.fromXmlName(tag)))
             else:
-                rv.append("|%s<%s>"%(' '*indent,
-                                     infosetFilter.fromXmlName(element.tag)))
+                rv.append("|%s<%s>" % (' ' * indent,
+                                       infosetFilter.fromXmlName(element.tag)))
 
             if hasattr(element, "attrib"):
                 attributes = []
@@ -108,60 +111,62 @@ def serializeElement(element, indent=0):
                         ns, name = nsmatch.groups()
                         name = infosetFilter.fromXmlName(name)
                         prefix = constants.prefixes[ns]
-                        attr_string = "%s %s"%(prefix, name)
+                        attr_string = "%s %s" % (prefix, name)
                     else:
                         attr_string = infosetFilter.fromXmlName(name)
                     attributes.append((attr_string, value))
 
                 for name, value in sorted(attributes):
-                    rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
+                    rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
 
             if element.text:
-                rv.append("|%s\"%s\"" %(' '*(indent+2), element.text))
+                rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
             indent += 2
             for child in element.getchildren():
                 serializeElement(child, indent)
             if hasattr(element, "tail") and element.tail:
-                rv.append("|%s\"%s\"" %(' '*(indent-2), element.tail))
+                rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
     serializeElement(element, 0)
 
     if finalText is not None:
-        rv.append("|%s\"%s\""%(' '*2, finalText))
+        rv.append("|%s\"%s\"" % (' ' * 2, finalText))
 
     return "\n".join(rv)
 
+
 def tostring(element):
     """Serialize an element and its child nodes to a string"""
     rv = []
     finalText = None
+
     def serializeElement(element):
         if not hasattr(element, "tag"):
             if element.docinfo.internalDTD:
                 if element.docinfo.doctype:
                     dtd_str = element.docinfo.doctype
                 else:
-                    dtd_str = "<!DOCTYPE %s>"%element.docinfo.root_name
+                    dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
                 rv.append(dtd_str)
             serializeElement(element.getroot())
 
         elif type(element.tag) == type(etree.Comment):
-            rv.append("<!--%s-->"%(element.text,))
+            rv.append("<!--%s-->" % (element.text,))
 
         else:
-            #This is assumed to be an ordinary element
+            # This is assumed to be an ordinary element
             if not element.attrib:
-                rv.append("<%s>"%(element.tag,))
+                rv.append("<%s>" % (element.tag,))
             else:
-                attr = " ".join(["%s=\"%s\""%(name, value)
+                attr = " ".join(["%s=\"%s\"" % (name, value)
                                  for name, value in element.attrib.items()])
-                rv.append("<%s %s>"%(element.tag, attr))
+                rv.append("<%s %s>" % (element.tag, attr))
             if element.text:
                 rv.append(element.text)
 
             for child in element.getchildren():
                 serializeElement(child)
 
-            rv.append("</%s>"%(element.tag,))
+            rv.append("</%s>" % (element.tag,))
 
         if hasattr(element, "tail") and element.tail:
             rv.append(element.tail)
@@ -169,7 +174,7 @@ def serializeElement(element):
     serializeElement(element)
 
     if finalText is not None:
-        rv.append("%s\""%(' '*2, finalText))
+        rv.append("%s\"" % (' ' * 2, finalText))
 
     return "".join(rv)
 
@@ -181,7 +186,7 @@ class TreeBuilder(_base.TreeBuilder):
     commentClass = None
     fragmentClass = Document
 
-    def __init__(self, namespaceHTMLElements, fullTree = False):
+    def __init__(self, namespaceHTMLElements, fullTree=False):
         builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
         infosetFilter = self.infosetFilter = ihatexml.InfosetFilter()
         self.namespaceHTMLElements = namespaceHTMLElements
@@ -192,7 +197,7 @@ def __init__(self, element, value={}):
                 dict.__init__(self, value)
                 for key, value in self.items():
                     if isinstance(key, tuple):
-                        name = "{%s}%s"%(key[2], infosetFilter.coerceAttribute(key[1]))
+                        name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
                     else:
                         name = infosetFilter.coerceAttribute(key)
                     self._element._element.attrib[name] = value
@@ -200,7 +205,7 @@ def __init__(self, element, value={}):
             def __setitem__(self, key, value):
                 dict.__setitem__(self, key, value)
                 if isinstance(key, tuple):
-                    name = "{%s}%s"%(key[2], infosetFilter.coerceAttribute(key[1]))
+                    name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
                 else:
                     name = infosetFilter.coerceAttribute(key)
                 self._element._element.attrib[name] = value
@@ -236,7 +241,6 @@ def insertText(self, data, insertBefore=None):
             def appendChild(self, child):
                 builder.Element.appendChild(self, child)
 
-
         class Comment(builder.Comment):
             def __init__(self, data):
                 data = infosetFilter.coerceComment(data)
@@ -253,7 +257,7 @@ def _getData(self):
 
         self.elementClass = Element
         self.commentClass = builder.Comment
-        #self.fragmentClass = builder.DocumentFragment
+        # self.fragmentClass = builder.DocumentFragment
         _base.TreeBuilder.__init__(self, namespaceHTMLElements)
 
     def reset(self):
@@ -297,23 +301,23 @@ def insertCommentInitial(self, data, parent=None):
 
     def insertCommentMain(self, data, parent=None):
         if (parent == self.document and
-            type(self.document._elementTree.getroot()[-1].tag) == type(etree.Comment)):
+                type(self.document._elementTree.getroot()[-1].tag) == type(etree.Comment)):
                 warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
         super(TreeBuilder, self).insertComment(data, parent)
 
     def insertRoot(self, token):
         """Create the document root"""
-        #Because of the way libxml2 works, it doesn't seem to be possible to
-        #alter information like the doctype after the tree has been parsed.
-        #Therefore we need to use the built-in parser to create our iniial
-        #tree, after which we can add elements like normal
+        # Because of the way libxml2 works, it doesn't seem to be possible to
+        # alter information like the doctype after the tree has been parsed.
+        # Therefore we need to use the built-in parser to create our iniial
+        # tree, after which we can add elements like normal
         docStr = ""
         if self.doctype and self.doctype.name and not self.doctype.name.startswith('"'):
-            docStr += "<!DOCTYPE %s"%self.doctype.name
+            docStr += "<!DOCTYPE %s" % self.doctype.name
             if (self.doctype.publicId is not None or
-                self.doctype.systemId is not None):
-                docStr += ' PUBLIC "%s" "%s"'%(self.doctype.publicId or "",
-                                               self.doctype.systemId or "")
+                    self.doctype.systemId is not None):
+                docStr += ' PUBLIC "%s" "%s"' % (self.doctype.publicId or "",
+                                                 self.doctype.systemId or "")
             docStr += ">"
             if self.doctype.name != token["name"]:
                 warnings.warn("lxml cannot represent doctype with a different name to the root element", DataLossWarning)
@@ -325,11 +329,11 @@ def insertRoot(self, token):
             print(docStr)
             raise
 
-        #Append the initial comments:
+        # Append the initial comments:
         for comment_token in self.initial_comments:
             root.addprevious(etree.Comment(comment_token["data"]))
 
-        #Create the root document and add the ElementTree to it
+        # Create the root document and add the ElementTree to it
         self.document = self.documentClass()
         self.document._elementTree = root.getroottree()
 
@@ -339,14 +343,14 @@ def insertRoot(self, token):
         if namespace is None:
             etree_tag = name
         else:
-            etree_tag = "{%s}%s"%(namespace, name)
+            etree_tag = "{%s}%s" % (namespace, name)
         root.tag = etree_tag
 
-        #Add the root element to the internal child/open data structures
+        # Add the root element to the internal child/open data structures
         root_element = self.elementClass(name, namespace)
         root_element._element = root
         self.document._childNodes.append(root_element)
         self.openElements.append(root_element)
 
-        #Reset to the default insert comment function
+        # Reset to the default insert comment function
         self.insertComment = self.insertCommentMain
diff --git a/html5lib/treebuilders/simpletree.py b/html5lib/treebuilders/simpletree.py
index f1bc871a..9558f3bd 100755
--- a/html5lib/treebuilders/simpletree.py
+++ b/html5lib/treebuilders/simpletree.py
@@ -6,8 +6,11 @@
 from xml.sax.saxutils import escape
 
 # Really crappy basic implementation of a DOM-core like thing
+
+
 class Node(_base.Node):
     type = -1
+
     def __init__(self, name):
         self.name = name
         self.parent = None
@@ -28,7 +31,7 @@ def toxml(self):
         raise NotImplementedError
 
     def printTree(self, indent=0):
-        tree = '\n|%s%s' % (' '* indent, text_type(self))
+        tree = '\n|%s%s' % (' ' * indent, text_type(self))
         for child in self.childNodes:
             tree += child.printTree(indent + 2)
         return tree
@@ -36,14 +39,14 @@ def printTree(self, indent=0):
     def appendChild(self, node):
         assert isinstance(node, Node)
         if (isinstance(node, TextNode) and self.childNodes and
-          isinstance(self.childNodes[-1], TextNode)):
+           isinstance(self.childNodes[-1], TextNode)):
             self.childNodes[-1].value += node.value
         else:
             self.childNodes.append(node)
         node.parent = self
 
     def insertText(self, data, insertBefore=None):
-        assert isinstance(data, text_type), "data %s is of type %s expected unicode"%(repr(data), type(data))
+        assert isinstance(data, text_type), "data %s is of type %s expected unicode" % (repr(data), type(data))
         if insertBefore is None:
             self.appendChild(TextNode(data))
         else:
@@ -52,7 +55,7 @@ def insertText(self, data, insertBefore=None):
     def insertBefore(self, node, refNode):
         index = self.childNodes.index(refNode)
         if (isinstance(node, TextNode) and index > 0 and
-          isinstance(self.childNodes[index - 1], TextNode)):
+           isinstance(self.childNodes[index - 1], TextNode)):
             self.childNodes[index - 1].value += node.value
         else:
             self.childNodes.insert(index, node)
@@ -74,15 +77,17 @@ def hasContent(self):
         return bool(self.childNodes)
 
     def getNameTuple(self):
-        if self.namespace == None:
+        if self.namespace is None:
             return namespaces["html"], self.name
         else:
             return self.namespace, self.name
 
     nameTuple = property(getNameTuple)
 
+
 class Document(Node):
     type = 1
+
     def __init__(self):
         Node.__init__(self, None)
 
@@ -113,16 +118,20 @@ def printTree(self):
     def cloneNode(self):
         return Document()
 
+
 class DocumentFragment(Document):
     type = 2
+
     def __str__(self):
         return "#document-fragment"
 
     def cloneNode(self):
         return DocumentFragment()
 
+
 class DocumentType(Node):
     type = 3
+
     def __init__(self, name, publicId, systemId):
         Node.__init__(self, name)
         self.publicId = publicId
@@ -132,13 +141,12 @@ def __str__(self):
         if self.publicId or self.systemId:
             publicId = self.publicId or ""
             systemId = self.systemId or ""
-            return """<!DOCTYPE %s "%s" "%s">"""%(
+            return """<!DOCTYPE %s "%s" "%s">""" % (
                 self.name, publicId, systemId)
 
         else:
             return "<!DOCTYPE %s>" % self.name
 
-
     toxml = __str__
 
     def hilite(self):
@@ -147,8 +155,10 @@ def hilite(self):
     def cloneNode(self):
         return DocumentType(self.name, self.publicId, self.systemId)
 
+
 class TextNode(Node):
     type = 4
+
     def __init__(self, value):
         Node.__init__(self, None)
         self.value = value
@@ -165,24 +175,26 @@ def cloneNode(self):
         assert isinstance(self.value, str)
         return TextNode(self.value)
 
+
 class Element(Node):
     type = 5
+
     def __init__(self, name, namespace=None):
         Node.__init__(self, name)
         self.namespace = namespace
         self.attributes = {}
 
     def __str__(self):
-        if self.namespace == None:
+        if self.namespace is None:
             return "<%s>" % self.name
         else:
-            return "<%s %s>"%(prefixes[self.namespace], self.name)
+            return "<%s %s>" % (prefixes[self.namespace], self.name)
 
     def toxml(self):
         result = '<' + self.name
         if self.attributes:
-            for name,value in self.attributes.items():
-                result += ' %s="%s"' % (name, escape(value,{'"':'&quot;'}))
+            for name, value in self.attributes.items():
+                result += ' %s="%s"' % (name, escape(value, {'"': '&quot;'}))
         if self.childNodes:
             result += '>'
             for child in self.childNodes:
@@ -196,7 +208,7 @@ def hilite(self):
         result = '&lt;<code class="markup element-name">%s</code>' % self.name
         if self.attributes:
             for name, value in self.attributes.items():
-                result += ' <code class="markup attribute-name">%s</code>=<code class="markup attribute-value">"%s"</code>' % (name, escape(value, {'"':'&quot;'}))
+                result += ' <code class="markup attribute-name">%s</code>=<code class="markup attribute-value">"%s"</code>' % (name, escape(value, {'"': '&quot;'}))
         if self.childNodes:
             result += ">"
             for child in self.childNodes:
@@ -206,12 +218,12 @@ def hilite(self):
         return result + '&lt;/<code class="markup element-name">%s</code>>' % self.name
 
     def printTree(self, indent):
-        tree = '\n|%s%s' % (' '*indent, text_type(self))
+        tree = '\n|%s%s' % (' ' * indent, text_type(self))
         indent += 2
         if self.attributes:
             for name, value in sorted(self.attributes.items()):
                 if isinstance(name, tuple):
-                    name = "%s %s"%(name[0], name[1])
+                    name = "%s %s" % (name[0], name[1])
                 tree += '\n|%s%s="%s"' % (' ' * indent, name, value)
         for child in self.childNodes:
             tree += child.printTree(indent)
@@ -223,8 +235,10 @@ def cloneNode(self):
             newNode.attributes[attr] = value
         return newNode
 
+
 class CommentNode(Node):
     type = 6
+
     def __init__(self, data):
         Node.__init__(self, None)
         self.data = data
@@ -241,6 +255,7 @@ def hilite(self):
     def cloneNode(self):
         return CommentNode(self.data)
 
+
 class TreeBuilder(_base.TreeBuilder):
     documentClass = Document
     doctypeClass = DocumentType
diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py
index 3d7de83f..bec625ce 100644
--- a/html5lib/treewalkers/__init__.py
+++ b/html5lib/treewalkers/__init__.py
@@ -14,6 +14,7 @@
 
 treeWalkerCache = {}
 
+
 def getTreeWalker(treeType, implementation=None, **kwargs):
     """Get a TreeWalker class for various types of tree with built-in support
 
diff --git a/html5lib/treewalkers/_base.py b/html5lib/treewalkers/_base.py
index 69da1af6..5b9c1e26 100644
--- a/html5lib/treewalkers/_base.py
+++ b/html5lib/treewalkers/_base.py
@@ -7,6 +7,7 @@
 from html5lib.constants import voidElements, spaceCharacters
 spaceCharacters = "".join(spaceCharacters)
 
+
 class TreeWalker(object):
     def __init__(self, tree):
         self.tree = tree
@@ -21,12 +22,12 @@ def emptyTag(self, namespace, name, attrs, hasChildren=False):
         assert namespace is None or isinstance(namespace, text_type), type(namespace)
         assert isinstance(name, text_type), type(name)
         assert all((namespace is None or isinstance(namespace, text_type)) and
-                                isinstance(name, text_type) and
-                                isinstance(value, text_type)
-                                for (namespace, name), value in attrs.items())
+                   isinstance(name, text_type) and
+                   isinstance(value, text_type)
+                   for (namespace, name), value in attrs.items())
 
         yield {"type": "EmptyTag", "name": name,
-               "namespace":namespace,
+               "namespace": namespace,
                "data": attrs}
         if hasChildren:
             yield self.error(_("Void element has children"))
@@ -35,13 +36,13 @@ def startTag(self, namespace, name, attrs):
         assert namespace is None or isinstance(namespace, text_type), type(namespace)
         assert isinstance(name, text_type), type(name)
         assert all((namespace is None or isinstance(namespace, text_type)) and
-                                isinstance(name, text_type) and
-                                isinstance(value, text_type)
-                                for (namespace, name), value in attrs.items())
+                   isinstance(name, text_type) and
+                   isinstance(value, text_type)
+                   for (namespace, name), value in attrs.items())
 
         return {"type": "StartTag",
                 "name": name,
-                "namespace":namespace,
+                "namespace": namespace,
                 "data": attrs}
 
     def endTag(self, namespace, name):
@@ -50,7 +51,7 @@ def endTag(self, namespace, name):
 
         return {"type": "EndTag",
                 "name": name,
-                "namespace":namespace,
+                "namespace": namespace,
                 "data": {}}
 
     def text(self, data):
@@ -58,7 +59,7 @@ def text(self, data):
 
         data = data
         middle = data.lstrip(spaceCharacters)
-        left = data[:len(data)-len(middle)]
+        left = data[:len(data) - len(middle)]
         if left:
             yield {"type": "SpaceCharacters", "data": left}
         data = middle
@@ -93,6 +94,7 @@ def entity(self, name):
     def unknown(self, nodeType):
         return self.error(_("Unknown node type: ") + nodeType)
 
+
 class RecursiveTreeWalker(TreeWalker):
     def walkChildren(self, node):
         raise NotImplementedError
@@ -118,6 +120,7 @@ def element(self, node, namespace, name, attrs, hasChildren):
 ENTITY = Node.ENTITY_NODE
 UNKNOWN = "<#UNKNOWN#>"
 
+
 class NonRecursiveTreeWalker(TreeWalker):
     def getNodeDetails(self, node):
         raise NotImplementedError
diff --git a/html5lib/treewalkers/dom.py b/html5lib/treewalkers/dom.py
index 2739e7a4..a01287a9 100644
--- a/html5lib/treewalkers/dom.py
+++ b/html5lib/treewalkers/dom.py
@@ -7,6 +7,7 @@
 
 from . import _base
 
+
 class TreeWalker(_base.NonRecursiveTreeWalker):
     def getNodeDetails(self, node):
         if node.nodeType == Node.DOCUMENT_TYPE_NODE:
@@ -20,9 +21,9 @@ def getNodeDetails(self, node):
             for attr in list(node.attributes.keys()):
                 attr = node.getAttributeNode(attr)
                 if attr.namespaceURI:
-                    attrs[(attr.namespaceURI,attr.localName)] = attr.value
+                    attrs[(attr.namespaceURI, attr.localName)] = attr.value
                 else:
-                    attrs[(None,attr.name)] = attr.value
+                    attrs[(None, attr.name)] = attr.value
             return (_base.ELEMENT, node.namespaceURI, node.nodeName,
                     attrs, node.hasChildNodes())
 
diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py
index 57de4aa9..93a2cbe0 100644
--- a/html5lib/treewalkers/etree.py
+++ b/html5lib/treewalkers/etree.py
@@ -12,6 +12,7 @@
 
 tag_regexp = re.compile("{([^}]*)}(.*)")
 
+
 def getETreeBuilder(ElementTreeImplementation):
     ElementTree = ElementTreeImplementation
 
@@ -30,7 +31,7 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
            text node; either the text or tail of the current element (1)
         """
         def getNodeDetails(self, node):
-            if isinstance(node, tuple): # It might be the root Element
+            if isinstance(node, tuple):  # It might be the root Element
                 elt, key, parents, flag = node
                 if flag in ("text", "tail"):
                     return _base.TEXT, getattr(elt, flag)
@@ -52,7 +53,7 @@ def getNodeDetails(self, node):
 
             else:
                 assert type(node.tag) == text_type, type(node.tag)
-                #This is assumed to be an ordinary element
+                # This is assumed to be an ordinary element
                 match = tag_regexp.match(node.tag)
                 if match:
                     namespace, tag = match.groups()
@@ -63,9 +64,9 @@ def getNodeDetails(self, node):
                 for name, value in list(node.attrib.items()):
                     match = tag_regexp.match(name)
                     if match:
-                        attrs[(match.group(1),match.group(2))] = value
+                        attrs[(match.group(1), match.group(2))] = value
                     else:
-                        attrs[(None,name)] = value
+                        attrs[(None, name)] = value
                 return (_base.ELEMENT, namespace, tag,
                         attrs, len(node) or node.text)
 
@@ -102,7 +103,7 @@ def getNextSibling(self, node):
                 if element.tail and flag != "tail":
                     return element, key, parents, "tail"
                 elif key < len(parents[-1]) - 1:
-                    return parents[-1][key+1], key+1, parents, None
+                    return parents[-1][key + 1], key + 1, parents, None
                 else:
                     return None
 
diff --git a/html5lib/treewalkers/genshistream.py b/html5lib/treewalkers/genshistream.py
index 365d6aec..e57ce635 100644
--- a/html5lib/treewalkers/genshistream.py
+++ b/html5lib/treewalkers/genshistream.py
@@ -2,12 +2,13 @@
 
 from genshi.core import QName
 from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
-from genshi.core  import  START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
+from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
 
 from . import _base
 
 from html5lib.constants import voidElements, namespaces
 
+
 class TreeWalker(_base.TreeWalker):
     def __iter__(self):
         # Buffer the events so we can pass in the following one
@@ -60,8 +61,8 @@ def tokens(self, event, next):
         elif kind == DOCTYPE:
             yield self.doctype(*data)
 
-        elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS, \
-          START_CDATA, END_CDATA, PI):
+        elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS,
+                      START_CDATA, END_CDATA, PI):
             pass
 
         else:
diff --git a/html5lib/treewalkers/lxmletree.py b/html5lib/treewalkers/lxmletree.py
index 186f9082..75c65afe 100644
--- a/html5lib/treewalkers/lxmletree.py
+++ b/html5lib/treewalkers/lxmletree.py
@@ -11,6 +11,7 @@
 
 from html5lib import ihatexml
 
+
 def ensure_str(s):
     if s is None:
         return None
@@ -19,6 +20,7 @@ def ensure_str(s):
     else:
         return s.decode("utf-8", "strict")
 
+
 class Root(object):
     def __init__(self, et):
         self.elementtree = et
@@ -49,6 +51,7 @@ def getnext(self):
     def __len__(self):
         return 1
 
+
 class Doctype(object):
     def __init__(self, root_node, name, public_id, system_id):
         self.root_node = root_node
@@ -62,6 +65,7 @@ def __init__(self, root_node, name, public_id, system_id):
     def getnext(self):
         return self.root_node.children[1]
 
+
 class FragmentRoot(Root):
     def __init__(self, children):
         self.children = [FragmentWrapper(self, child) for child in children]
@@ -70,6 +74,7 @@ def __init__(self, children):
     def getnext(self):
         return None
 
+
 class FragmentWrapper(object):
     def __init__(self, fragment_root, obj):
         self.root_node = fragment_root
@@ -127,7 +132,7 @@ def __init__(self, tree):
         self.filter = ihatexml.InfosetFilter()
 
     def getNodeDetails(self, node):
-        if isinstance(node, tuple): # Text node
+        if isinstance(node, tuple):  # Text node
             node, key = node
             assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
             return _base.TEXT, ensure_str(getattr(node, key))
@@ -145,10 +150,10 @@ def getNodeDetails(self, node):
             return _base.COMMENT, ensure_str(node.text)
 
         elif node.tag == etree.Entity:
-            return _base.ENTITY, ensure_str(node.text)[1:-1] # strip &;
+            return _base.ENTITY, ensure_str(node.text)[1:-1]  # strip &;
 
         else:
-            #This is assumed to be an ordinary element
+            # This is assumed to be an ordinary element
             match = tag_regexp.match(ensure_str(node.tag))
             if match:
                 namespace, tag = match.groups()
@@ -161,9 +166,9 @@ def getNodeDetails(self, node):
                 value = ensure_str(value)
                 match = tag_regexp.match(name)
                 if match:
-                    attrs[(match.group(1),match.group(2))] = value
+                    attrs[(match.group(1), match.group(2))] = value
                 else:
-                    attrs[(None,name)] = value
+                    attrs[(None, name)] = value
             return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag),
                     attrs, len(node) > 0 or node.text)
 
@@ -177,7 +182,7 @@ def getFirstChild(self, node):
             return node[0]
 
     def getNextSibling(self, node):
-        if isinstance(node, tuple): # Text node
+        if isinstance(node, tuple):  # Text node
             node, key = node
             assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
             if key == "text":
@@ -187,13 +192,13 @@ def getNextSibling(self, node):
                     return node[0]
                 else:
                     return None
-            else: # tail
+            else:  # tail
                 return node.getnext()
 
         return (node, "tail") if node.tail else node.getnext()
 
     def getParentNode(self, node):
-        if isinstance(node, tuple): # Text node
+        if isinstance(node, tuple):  # Text node
             node, key = node
             assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
             if key == "text":
diff --git a/html5lib/treewalkers/pulldom.py b/html5lib/treewalkers/pulldom.py
index 12501093..0ecd9790 100644
--- a/html5lib/treewalkers/pulldom.py
+++ b/html5lib/treewalkers/pulldom.py
@@ -7,13 +7,14 @@
 
 from html5lib.constants import voidElements
 
+
 class TreeWalker(_base.TreeWalker):
     def __iter__(self):
         ignore_until = None
         previous = None
         for event in self.tree:
             if previous is not None and \
-              (ignore_until is None or previous[1] is ignore_until):
+                    (ignore_until is None or previous[1] is ignore_until):
                 if previous[1] is ignore_until:
                     ignore_until = None
                 for token in self.tokens(previous, event):
@@ -35,7 +36,7 @@ def tokens(self, event, next):
             attrs = {}
             for attr in list(node.attributes.keys()):
                 attr = node.getAttributeNode(attr)
-                attrs[(attr.namespaceURI,attr.localName)] = attr.value
+                attrs[(attr.namespaceURI, attr.localName)] = attr.value
             if name in voidElements:
                 for token in self.emptyTag(namespace,
                                            name,
diff --git a/html5lib/treewalkers/simpletree.py b/html5lib/treewalkers/simpletree.py
index a2abec85..58c4c0a9 100644
--- a/html5lib/treewalkers/simpletree.py
+++ b/html5lib/treewalkers/simpletree.py
@@ -5,6 +5,7 @@
 
 from . import _base
 
+
 class TreeWalker(_base.NonRecursiveTreeWalker):
     """Given that simpletree has no performant way of getting a node's
     next sibling, this implementation returns "nodes" as tuples with the
@@ -19,38 +20,38 @@ class TreeWalker(_base.NonRecursiveTreeWalker):
     """
 
     def getNodeDetails(self, node):
-        if isinstance(node, tuple): # It might be the root Node
+        if isinstance(node, tuple):  # It might be the root Node
             parent, idx, parents = node
             node = parent.childNodes[idx]
 
         # testing node.type allows us not to import treebuilders.simpletree
-        if node.type in (1, 2): # Document or DocumentFragment
+        if node.type in (1, 2):  # Document or DocumentFragment
             return (_base.DOCUMENT,)
 
-        elif node.type == 3: # DocumentType
+        elif node.type == 3:  # DocumentType
             return _base.DOCTYPE, node.name, node.publicId, node.systemId
 
-        elif node.type == 4: # TextNode
+        elif node.type == 4:  # TextNode
             return _base.TEXT, node.value
 
-        elif node.type == 5: # Element
+        elif node.type == 5:  # Element
             attrs = {}
             for name, value in list(node.attributes.items()):
                 if isinstance(name, tuple):
-                    attrs[(name[2],name[1])] = value
+                    attrs[(name[2], name[1])] = value
                 else:
-                    attrs[(None,name)] = value
+                    attrs[(None, name)] = value
             return (_base.ELEMENT, node.namespace, node.name,
                     attrs, node.hasContent())
 
-        elif node.type == 6: # CommentNode
+        elif node.type == 6:  # CommentNode
             return _base.COMMENT, node.data
 
         else:
             return _base.UNKNOWN, node.type
 
     def getFirstChild(self, node):
-        if isinstance(node, tuple): # It might be the root Node
+        if isinstance(node, tuple):  # It might be the root Node
             parent, idx, parents = node
             parents.append((parent, idx))
             node = parent.childNodes[idx]
diff --git a/html5lib/trie/_base.py b/html5lib/trie/_base.py
index c4a4354d..724486b1 100644
--- a/html5lib/trie/_base.py
+++ b/html5lib/trie/_base.py
@@ -2,6 +2,7 @@
 
 from collections import Mapping
 
+
 class Trie(Mapping):
     """Abstract base class for tries"""
 
diff --git a/html5lib/trie/datrie.py b/html5lib/trie/datrie.py
index 762b471f..51f3d046 100644
--- a/html5lib/trie/datrie.py
+++ b/html5lib/trie/datrie.py
@@ -5,6 +5,7 @@
 
 from ._base import Trie as ABCTrie
 
+
 class Trie(ABCTrie):
     def __init__(self, data):
         chars = set()
diff --git a/html5lib/trie/py.py b/html5lib/trie/py.py
index ec817d78..c2ba3da7 100644
--- a/html5lib/trie/py.py
+++ b/html5lib/trie/py.py
@@ -5,6 +5,7 @@
 
 from ._base import Trie as ABCTrie
 
+
 class Trie(ABCTrie):
     def __init__(self, data):
         if not all(isinstance(x, text_type) for x in data.keys()):
diff --git a/html5lib/utils.py b/html5lib/utils.py
index 4363182b..9841aebf 100644
--- a/html5lib/utils.py
+++ b/html5lib/utils.py
@@ -2,6 +2,7 @@
 
 from types import ModuleType
 
+
 class MethodDispatcher(dict):
     """Dict with 2 special properties:
 
@@ -20,7 +21,7 @@ def __init__(self, items=()):
         # twice as fast. Please do careful performance testing before changing
         # anything here.
         _dictEntries = []
-        for name,value in items:
+        for name, value in items:
             if type(name) in (list, tuple, frozenset, set):
                 for item in name:
                     _dictEntries.append((item, value))
@@ -33,14 +34,15 @@ def __getitem__(self, key):
         return dict.get(self, key, self.default)
 
 
-#Some utility functions to dal with weirdness around UCS2 vs UCS4
-#python builds
+# Some utility functions to dal with weirdness around UCS2 vs UCS4
+# python builds
 
 def isSurrogatePair(data):
     return (len(data) == 2 and
             ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and
             ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF)
 
+
 def surrogatePairToCodepoint(data):
     char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 +
                 (ord(data[1]) - 0xDC00))
@@ -49,10 +51,12 @@ def surrogatePairToCodepoint(data):
 # Module Factory Factory (no, this isn't Java, I know)
 # Here to stop this being duplicated all over the place.
 
+
 def moduleFactoryFactory(factory):
     moduleCache = {}
+
     def moduleFactory(baseModule, *args, **kwargs):
-        if type(ModuleType.__name__) is type(""):
+        if isinstance(ModuleType.__name__, type("")):
             name = "_%s_factory" % baseModule.__name__
         else:
             name = b"_%s_factory" % baseModule.__name__

From b8f8e3d76f117a9afb57e4641771f9dca7b8cd58 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Sat, 13 Apr 2013 22:17:46 +0100
Subject: [PATCH 04/12] Fix ElementTree treewalker under cElementTree on Python
 2.6.

This is effectively the treewalker equivalent of 3e50aad85d8113e7:

    Don't rely on the ElementTree Comment factory being the tag
    attribute on Comments.

    This is needed for xml.etree.cElementTree under 2.6 (and likely
    hence custom installs of cElementTree 1.2), where the tag property
    is equal to xml.etree.ElementTree.Comment (i.e., the pure Python
    version).
---
 html5lib/treewalkers/etree.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py
index 93a2cbe0..6cf3bca7 100644
--- a/html5lib/treewalkers/etree.py
+++ b/html5lib/treewalkers/etree.py
@@ -15,6 +15,7 @@
 
 def getETreeBuilder(ElementTreeImplementation):
     ElementTree = ElementTreeImplementation
+    ElementTreeCommentType = ElementTree.Comment("asd").tag
 
     class TreeWalker(_base.NonRecursiveTreeWalker):
         """Given the particular ElementTree representation, this implementation,
@@ -48,7 +49,7 @@ def getNodeDetails(self, node):
                 return (_base.DOCTYPE, node.text,
                         node.get("publicId"), node.get("systemId"))
 
-            elif node.tag == ElementTree.Comment:
+            elif node.tag == ElementTreeCommentType:
                 return _base.COMMENT, node.text
 
             else:

From fa3cad5d0829f761fa70493299c1e051da4cbd1a Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Sat, 13 Apr 2013 22:21:15 +0100
Subject: [PATCH 05/12] fixup! Placate pyflakes.

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 262df222..0fc0d72b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,10 +12,10 @@ env:
 
 matrix:
   exclude:
-    - python: 3.3
+    - python: "3.3"
       env: USE_OPTIONAL=false
   include:
-    - python: 3.3
+    - python: "3.3"
       env: USE_OPTIONAL=false FLAKE=true
 
 before_install:

From 5b3cb3589be952b1919e2c59ee860ffa60e97e5b Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Sat, 13 Apr 2013 22:29:44 +0100
Subject: [PATCH 06/12] Fix more flake8 issues.

It turns out flake8's behaviour differs between Python 2 and Python 3
because of the changes between the two, so run it on both on Travis.
---
 .travis.yml                         | 4 ++++
 html5lib/tests/tokenizertotree.py   | 2 +-
 html5lib/tokenizer.py               | 2 +-
 html5lib/treebuilders/etree.py      | 2 +-
 html5lib/treebuilders/etree_lxml.py | 6 +++---
 5 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 0fc0d72b..66d92deb 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,9 +12,13 @@ env:
 
 matrix:
   exclude:
+    - python: "2.7"
+      env: USE_OPTIONAL=false
     - python: "3.3"
       env: USE_OPTIONAL=false
   include:
+    - python: "2.7"
+      env: USE_OPTIONAL=false FLAKE=true
     - python: "3.3"
       env: USE_OPTIONAL=false FLAKE=true
 
diff --git a/html5lib/tests/tokenizertotree.py b/html5lib/tests/tokenizertotree.py
index 27239997..b841c76c 100644
--- a/html5lib/tests/tokenizertotree.py
+++ b/html5lib/tests/tokenizertotree.py
@@ -25,7 +25,7 @@ def main(out_path):
 
 def run_file(filename, out_path):
     try:
-        tests_data = json.load(file(filename))
+        tests_data = json.load(open(filename, "r"))
     except ValueError:
         sys.stderr.write("Failed to load %s\n" % filename)
         return
diff --git a/html5lib/tokenizer.py b/html5lib/tokenizer.py
index fca976c7..c49eee0d 100644
--- a/html5lib/tokenizer.py
+++ b/html5lib/tokenizer.py
@@ -1,7 +1,7 @@
 from __future__ import absolute_import, division, unicode_literals
 
 try:
-    chr = unichr
+    chr = unichr # flake8: noqa
 except NameError:
     pass
 
diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py
index ed2311c4..c5b27fae 100755
--- a/html5lib/treebuilders/etree.py
+++ b/html5lib/treebuilders/etree.py
@@ -255,7 +255,7 @@ def tostring(element):
         filter = ihatexml.InfosetFilter()
 
         def serializeElement(element):
-            if type(element) == type(ElementTree.ElementTree):
+            if isinstance(element, ElementTree.ElementTree):
                 element = element.getroot()
 
             if element.tag == "<!DOCTYPE>":
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index 4ca894b7..867700ea 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -86,7 +86,7 @@ def serializeElement(element, indent=0):
                 rv.append("#document-fragment")
                 for next_element in element:
                     serializeElement(next_element, indent + 2)
-        elif type(element.tag) == type(etree.Comment):
+        elif isinstance(element.tag, etree.Comment):
             rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
             if hasattr(element, "tail") and element.tail:
                 rv.append("|%s\"%s\"" % (' ' * indent, element.tail))
@@ -149,7 +149,7 @@ def serializeElement(element):
                 rv.append(dtd_str)
             serializeElement(element.getroot())
 
-        elif type(element.tag) == type(etree.Comment):
+        elif isinstance(element.tag, etree.Comment):
             rv.append("<!--%s-->" % (element.text,))
 
         else:
@@ -301,7 +301,7 @@ def insertCommentInitial(self, data, parent=None):
 
     def insertCommentMain(self, data, parent=None):
         if (parent == self.document and
-                type(self.document._elementTree.getroot()[-1].tag) == type(etree.Comment)):
+                isinstance(self.document._elementTree.getroot()[-1].tag, etree.Comment)):
                 warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
         super(TreeBuilder, self).insertComment(data, parent)
 

From c10df713658a59afde2875da0a55906a06d40e53 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Sat, 13 Apr 2013 22:43:05 +0100
Subject: [PATCH 07/12] fixup! Fix more flake8 issues.

---
 html5lib/treebuilders/etree_lxml.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index 867700ea..6879797e 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -29,6 +29,8 @@
 fullTree = True
 tag_regexp = re.compile("{([^}]*)}(.*)")
 
+comment_type = etree.Comment("asd").tag
+
 
 class DocumentType(object):
     def __init__(self, name, publicId, systemId):
@@ -86,7 +88,7 @@ def serializeElement(element, indent=0):
                 rv.append("#document-fragment")
                 for next_element in element:
                     serializeElement(next_element, indent + 2)
-        elif isinstance(element.tag, etree.Comment):
+        elif element.tag == comment_type:
             rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
             if hasattr(element, "tail") and element.tail:
                 rv.append("|%s\"%s\"" % (' ' * indent, element.tail))
@@ -149,7 +151,7 @@ def serializeElement(element):
                 rv.append(dtd_str)
             serializeElement(element.getroot())
 
-        elif isinstance(element.tag, etree.Comment):
+        elif element.tag == comment_type:
             rv.append("<!--%s-->" % (element.text,))
 
         else:
@@ -301,7 +303,7 @@ def insertCommentInitial(self, data, parent=None):
 
     def insertCommentMain(self, data, parent=None):
         if (parent == self.document and
-                isinstance(self.document._elementTree.getroot()[-1].tag, etree.Comment)):
+                self.document._elementTree.getroot()[-1].tag == comment_type):
                 warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
         super(TreeBuilder, self).insertComment(data, parent)
 

From 5c59f4c78f51772a7cc7b60985435f5a41bebcbf Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Tue, 23 Apr 2013 16:47:00 +0100
Subject: [PATCH 08/12] Remove commented-out code

---
 html5lib/tests/test_treewalkers.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
index 039bf3d9..2ac59115 100644
--- a/html5lib/tests/test_treewalkers.py
+++ b/html5lib/tests/test_treewalkers.py
@@ -80,9 +80,6 @@ def PullDOMAdapter(node):
 except ImportError:
     pass
 else:
-#    treeTypes['lxml_as_etree'] = \
-#        {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
-#         "walker":  treewalkers.getTreeWalker("etree", ElementTree)}
     treeTypes['lxml_native'] = \
         {"builder": treebuilders.getTreeBuilder("lxml"),
          "walker": treewalkers.getTreeWalker("lxml")}

From 8ad7e3b6c629bb4ada9968f3bcb8475dd6edc3ae Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Tue, 23 Apr 2013 16:48:19 +0100
Subject: [PATCH 09/12] Move Genshi adapter/test dict out of try block.

---
 html5lib/tests/test_treewalkers.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
index 2ac59115..7f7853ed 100644
--- a/html5lib/tests/test_treewalkers.py
+++ b/html5lib/tests/test_treewalkers.py
@@ -98,7 +98,9 @@ def PullDOMAdapter(node):
 try:
     from genshi.core import QName, Attrs
     from genshi.core import START, END, TEXT, COMMENT, DOCTYPE
-
+except ImportError:
+    pass
+else:
     def GenshiAdapter(tree):
         text = None
         for token in treewalkers.getTreeWalker("simpletree")(tree):
@@ -148,8 +150,6 @@ def GenshiAdapter(tree):
         {"builder": treebuilders.getTreeBuilder("simpletree"),
          "adapter": GenshiAdapter,
          "walker": treewalkers.getTreeWalker("genshi")}
-except ImportError:
-    pass
 
 
 def concatenateCharacterTokens(tokens):

From e3786381a3c553bd5cbcfb35b187694c7f513ad2 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Sat, 27 Apr 2013 13:23:58 +0100
Subject: [PATCH 10/12] fixup! Placate pyflakes.

---
 html5lib/treebuilders/etree.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py
index c5b27fae..ec0115ba 100755
--- a/html5lib/treebuilders/etree.py
+++ b/html5lib/treebuilders/etree.py
@@ -207,8 +207,12 @@ def serializeElement(element, indent=0):
                     rv.append("<!DOCTYPE %s>" % (element.text,))
             elif element.tag == "DOCUMENT_ROOT":
                 rv.append("#document")
-                assert element.text is None
-                assert element.tail is None
+                if element.text is not None:
+                    raise TypeError("Document node cannot have text")
+                if element.tail is not None:
+                    raise TypeError("Document node cannot have tail")
+                if hasattr(element, "attrib") and len(element.attrib):
+                    raise TypeError("Document node cannot have attributes")
             elif element.tag == ElementTreeCommentType:
                 rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
             else:

From 19e5fa86116b8cc69e14bd3849a44d27f8447684 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Sat, 27 Apr 2013 13:28:42 +0100
Subject: [PATCH 11/12] fixup! Placate pyflakes.

---
 html5lib/treebuilders/etree.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py
index ec0115ba..0abbab51 100755
--- a/html5lib/treebuilders/etree.py
+++ b/html5lib/treebuilders/etree.py
@@ -208,7 +208,7 @@ def serializeElement(element, indent=0):
             elif element.tag == "DOCUMENT_ROOT":
                 rv.append("#document")
                 if element.text is not None:
-                    raise TypeError("Document node cannot have text")
+                    rv.append("|%s\"%s\""%(' '*(indent+2), element.text))
                 if element.tail is not None:
                     raise TypeError("Document node cannot have tail")
                 if hasattr(element, "attrib") and len(element.attrib):
@@ -271,8 +271,12 @@ def serializeElement(element):
                 else:
                     rv.append("<!DOCTYPE %s>" % (element.text,))
             elif element.tag == "DOCUMENT_ROOT":
-                assert element.text is None
-                assert element.tail is None
+                if element.text is not None:
+                    rv.append(element.text)
+                if element.tail is not None:
+                    raise TypeError("Document node cannot have tail")
+                if hasattr(element, "attrib") and len(element.attrib):
+                    raise TypeError("Document node cannot have attributes")
 
                 for child in element:
                     serializeElement(child)

From 2535c50f91260c506df91747df621870db5294cb Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Sat, 27 Apr 2013 13:40:13 +0100
Subject: [PATCH 12/12] fixup! fixup! Placate pyflakes.

---
 html5lib/treebuilders/etree.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py
index 0abbab51..018b6606 100755
--- a/html5lib/treebuilders/etree.py
+++ b/html5lib/treebuilders/etree.py
@@ -208,7 +208,7 @@ def serializeElement(element, indent=0):
             elif element.tag == "DOCUMENT_ROOT":
                 rv.append("#document")
                 if element.text is not None:
-                    rv.append("|%s\"%s\""%(' '*(indent+2), element.text))
+                    rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
                 if element.tail is not None:
                     raise TypeError("Document node cannot have tail")
                 if hasattr(element, "attrib") and len(element.attrib):