From c5a800779b5af3d46aa4dbc69a171fdc47906005 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Wed, 4 May 2016 00:05:51 +0100
Subject: [PATCH 1/3] Add a better test for encoding prescan length

---
 html5lib/tests/test_encoding.py | 9 +++++++++
 1 file changed, 9 insertions(+)
diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py
index 837e989f..6c996b00 100644
--- a/html5lib/tests/test_encoding.py
+++ b/html5lib/tests/test_encoding.py
@@ -12,6 +12,15 @@
 from html5lib import HTMLParser, inputstream
 
 
+def test_basic_prescan_length():
+    data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
+    pad = 1024 - len(data) + 1
+    data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
+    assert len(data) == 1024  # Sanity
+    stream = inputstream.HTMLBinaryInputStream(data, chardet=False)
+    assert 'utf-8' == stream.charEncoding[0].name
+
+
 def runParserEncodingTest(data, encoding):
     p = HTMLParser()
     assert p.documentEncoding is None

From 1d9f391f6f92677c29803272d9a3e27831a84814 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Wed, 4 May 2016 00:06:24 +0100
Subject: [PATCH 2/3] Fix changing encoding to actually change encoding; add
 test for it

---
 html5lib/inputstream.py         |  2 +-
 html5lib/tests/test_encoding.py | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/html5lib/inputstream.py b/html5lib/inputstream.py
index ad5ca7dc..15acba0d 100644
--- a/html5lib/inputstream.py
+++ b/html5lib/inputstream.py
@@ -509,8 +509,8 @@ def changeEncoding(self, newEncoding):
             self.charEncoding = (self.charEncoding[0], "certain")
         else:
             self.rawStream.seek(0)
-            self.reset()
             self.charEncoding = (newEncoding, "certain")
+            self.reset()
             raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
 
     def detectBOM(self):
diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py
index 6c996b00..3837fe09 100644
--- a/html5lib/tests/test_encoding.py
+++ b/html5lib/tests/test_encoding.py
@@ -21,6 +21,19 @@ def test_basic_prescan_length():
     assert 'utf-8' == stream.charEncoding[0].name
 
 
+def test_parser_reparse():
+    data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
+    pad = 10240 - len(data) + 1
+    data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
+    assert len(data) == 10240  # Sanity
+    stream = inputstream.HTMLBinaryInputStream(data, chardet=False)
+    assert 'windows-1252' == stream.charEncoding[0].name
+    p = HTMLParser(namespaceHTMLElements=False)
+    doc = p.parse(data, useChardet=False)
+    assert 'utf-8' == p.documentEncoding
+    assert doc.find(".//title").text == "Caf\u00E9"
+
+
 def runParserEncodingTest(data, encoding):
     p = HTMLParser()
     assert p.documentEncoding is None

From b0ae0c2aaa5c2d74602a9dc576f10753f1383882 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Wed, 4 May 2016 00:06:48 +0100
Subject: [PATCH 3/3] Ensure we only ever reparse *once*

---
 html5lib/html5parser.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
index a7cb98be..34f7ac5c 100644
--- a/html5lib/html5parser.py
+++ b/html5lib/html5parser.py
@@ -89,12 +89,11 @@ def _parse(self, stream, innerHTML=False, container="div",
                                               parser=self, **kwargs)
         self.reset()
 
-        while True:
-            try:
-                self.mainLoop()
-                break
-            except ReparseException:
-                self.reset()
+        try:
+            self.mainLoop()
+        except ReparseException:
+            self.reset()
+            self.mainLoop()
 
     def reset(self):
         self.tree.reset()