diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index b0f14f39..9be3e134 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -129,6 +129,17 @@ def reset(self): self.framesetOK = True + @property + def documentEncoding(self): + """The name of the character encoding + that was used to decode the input stream, + or :obj:`None` if that is not determined yet. + + """ + if not hasattr(self, 'tokenizer'): + return None + return self.tokenizer.stream.charEncoding[0] + def isHTMLIntegrationPoint(self, element): if (element.name == "annotation-xml" and element.namespace == namespaces["mathml"]): diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py index f314421d..d774ce0f 100644 --- a/html5lib/tests/test_encoding.py +++ b/html5lib/tests/test_encoding.py @@ -28,10 +28,11 @@ def test_codec_name_d(self): def runParserEncodingTest(data, encoding): p = HTMLParser() + assert p.documentEncoding is None p.parse(data, useChardet=False) encoding = encoding.lower().decode("ascii") - assert encoding == p.tokenizer.stream.charEncoding[0], errorMessage(data, encoding, p.tokenizer.stream.charEncoding[0]) + assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding) def runPreScanEncodingTest(data, encoding):