Skip to content

Commit 40ff8fb

Browse files
committed
Fix input stream tests.
1 parent 8d3bd1a commit 40ff8fb

File tree

2 files changed

+13
-10
lines changed

2 files changed

+13
-10
lines changed

html5lib/inputstream.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -504,8 +504,8 @@ def detectBOM(self):
504504
encoding otherwise return None"""
505505
bomDict = {
506506
codecs.BOM_UTF8: 'utf-8',
507-
codecs.BOM_UTF16_LE: 'utf-16-le', codecs.BOM_UTF16_BE: 'utf-16-be',
508-
codecs.BOM_UTF32_LE: 'utf-32-le', codecs.BOM_UTF32_BE: 'utf-32-be'
507+
codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be',
508+
codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be'
509509
}
510510

511511
# Go to beginning of file and read in 4 bytes
@@ -525,9 +525,12 @@ def detectBOM(self):
525525

526526
# Set the read position past the BOM if one was found, otherwise
527527
# set it to the start of the stream
528-
self.rawStream.seek(encoding and seek or 0)
529-
530-
return lookupEncoding(encoding)
528+
if encoding:
529+
self.rawStream.seek(seek)
530+
return lookupEncoding(encoding)
531+
else:
532+
self.rawStream.seek(0)
533+
return None
531534

532535
def detectEncodingMeta(self):
533536
"""Report the encoding declared by the meta element

html5lib/tests/test_stream.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -84,29 +84,29 @@ class HTMLInputStreamTest(unittest.TestCase):
8484

8585
def test_char_ascii(self):
8686
stream = HTMLInputStream(b"'", encoding='ascii')
87-
self.assertEqual(stream.charEncoding[0], 'ascii')
87+
self.assertEqual(stream.charEncoding[0].name, 'windows-1252')
8888
self.assertEqual(stream.char(), "'")
8989

9090
def test_char_utf8(self):
9191
stream = HTMLInputStream('\u2018'.encode('utf-8'), encoding='utf-8')
92-
self.assertEqual(stream.charEncoding[0], 'utf-8')
92+
self.assertEqual(stream.charEncoding[0].name, 'utf-8')
9393
self.assertEqual(stream.char(), '\u2018')
9494

9595
def test_char_win1252(self):
9696
stream = HTMLInputStream("\xa9\xf1\u2019".encode('windows-1252'))
97-
self.assertEqual(stream.charEncoding[0], 'windows-1252')
97+
self.assertEqual(stream.charEncoding[0].name, 'windows-1252')
9898
self.assertEqual(stream.char(), "\xa9")
9999
self.assertEqual(stream.char(), "\xf1")
100100
self.assertEqual(stream.char(), "\u2019")
101101

102102
def test_bom(self):
103103
stream = HTMLInputStream(codecs.BOM_UTF8 + b"'")
104-
self.assertEqual(stream.charEncoding[0], 'utf-8')
104+
self.assertEqual(stream.charEncoding[0].name, 'utf-8')
105105
self.assertEqual(stream.char(), "'")
106106

107107
def test_utf_16(self):
108108
stream = HTMLInputStream((' ' * 1025).encode('utf-16'))
109-
self.assertTrue(stream.charEncoding[0] in ['utf-16-le', 'utf-16-be'], stream.charEncoding)
109+
self.assertTrue(stream.charEncoding[0].name in ['utf-16le', 'utf-16be'], stream.charEncoding)
110110
self.assertEqual(len(stream.charsUntil(' ', True)), 1025)
111111

112112
def test_newlines(self):

0 commit comments

Comments
 (0)