@@ -185,14 +185,10 @@ def __init__(self, source):
185
185
# Such platforms will have already checked for such
186
186
# surrogate errors, so no need to do this checking.
187
187
self .reportCharacterErrors = None
188
- self .replaceCharactersRegexp = None
189
188
elif len ("\U0010FFFF " ) == 1 :
190
189
self .reportCharacterErrors = self .characterErrorsUCS4
191
- self .replaceCharactersRegexp = re .compile (eval ('"[\\ uD800-\\ uDFFF]"' ))
192
190
else :
193
191
self .reportCharacterErrors = self .characterErrorsUCS2
194
- self .replaceCharactersRegexp = re .compile (
195
- eval ('"([\\ uD800-\\ uDBFF](?![\\ uDC00-\\ uDFFF])|(?<![\\ uD800-\\ uDBFF])[\\ uDC00-\\ uDFFF])"' ))
196
192
197
193
# List of where new lines occur
198
194
self .newLines = [0 ]
@@ -290,10 +286,7 @@ def readChunk(self, chunkSize=None):
290
286
if self .reportCharacterErrors :
291
287
self .reportCharacterErrors (data )
292
288
293
- # Replace invalid characters
294
- # Note U+0000 is dealt with in the tokenizer
295
- data = self .replaceCharactersRegexp .sub ("\ufffd " , data )
296
-
289
+ # Replace invalid characters
297
290
data = data .replace ("\r \n " , "\n " )
298
291
data = data .replace ("\r " , "\n " )
299
292
0 commit comments