@@ -27,7 +27,7 @@ class BufferedIOBase(object):
27
27
asciiUppercaseBytes = frozenset ([item .encode ("ascii" ) for item in asciiUppercase ])
28
28
spacesAngleBrackets = spaceCharactersBytes | frozenset ([b">" , b"<" ])
29
29
30
- invalid_unicode_re = re .compile ("[\u0001 -\u0008 \u000B \u000E -\u001F \u007F -\u009F \uD800 -\uDFFF \uFDD0 -\uFDEF \uFFFE \uFFFF \U0001FFFE \U0001FFFF \U0002FFFE \U0002FFFF \U0003FFFE \U0003FFFF \U0004FFFE \U0004FFFF \U0005FFFE \U0005FFFF \U0006FFFE \U0006FFFF \U0007FFFE \U0007FFFF \U0008FFFE \U0008FFFF \U0009FFFE \U0009FFFF \U000AFFFE \U000AFFFF \U000BFFFE \U000BFFFF \U000CFFFE \U000CFFFF \U000DFFFE \U000DFFFF \U000EFFFE \U000EFFFF \U000FFFFE \U000FFFFF \U0010FFFE \U0010FFFF ]" )
30
+ invalid_unicode_re = re .compile ("[\u0001 -\u0008 \u000B \u000E -\u001F \u007F -\u009F \uD800 -\uDFFF \uFDD0 -\uFDEF \uFFFE \uFFFF \U0001FFFE \U0001FFFF \U0002FFFE \U0002FFFF \U0003FFFE \U0003FFFF \U0004FFFE \U0004FFFF \U0005FFFE \U0005FFFF \U0006FFFE \U0006FFFF \U0007FFFE \U0007FFFF \U0008FFFE \U0008FFFF \U0009FFFE \U0009FFFF \U000AFFFE \U000AFFFF \U000BFFFE \U000BFFFF \U000CFFFE \U000CFFFF \U000DFFFE \U000DFFFF \U000EFFFE \U000EFFFF \U000FFFFE \U000FFFFF \U0010FFFE \U0010FFFF ]" ) # noqa
31
31
32
32
non_bmp_invalid_codepoints = set ([0x1FFFE , 0x1FFFF , 0x2FFFE , 0x2FFFF , 0x3FFFE ,
33
33
0x3FFFF , 0x4FFFE , 0x4FFFF , 0x5FFFE , 0x5FFFF ,
@@ -118,6 +118,10 @@ def _readFromBuffer(self, bytes):
118
118
119
119
120
120
def HTMLInputStream (source , encoding = None , parseMeta = True , chardet = True ):
121
+ if (hasattr (source , 'unget' ) and hasattr (source , 'charsUntil' ) and
122
+ hasattr (source , 'position' ) and hasattr (source , 'char' ) and
123
+ hasattr (source , 'reset' ) and hasattr (source , 'errors' )):
124
+ return source
121
125
if hasattr (source , "read" ):
122
126
isUnicode = isinstance (source .read (0 ), text_type )
123
127
else :
0 commit comments