diff --git a/bson/codec_options.py b/bson/codec_options.py index 81e79158b4..27df48de8a 100644 --- a/bson/codec_options.py +++ b/bson/codec_options.py @@ -233,7 +233,8 @@ class CodecOptions(_options_base): - `unicode_decode_error_handler`: The error handler to apply when a Unicode-related error occurs during BSON decoding that would otherwise raise :exc:`UnicodeDecodeError`. Valid options include - 'strict', 'replace', and 'ignore'. Defaults to 'strict'. + 'strict', 'replace', 'backslashreplace', 'surrogateescape', and + 'ignore'. Defaults to 'strict'. - `tzinfo`: A :class:`~datetime.tzinfo` subclass that specifies the timezone to/from which :class:`~datetime.datetime` objects should be encoded/decoded. diff --git a/pymongo/mongo_client.py b/pymongo/mongo_client.py index a133c96a7f..052ade3853 100644 --- a/pymongo/mongo_client.py +++ b/pymongo/mongo_client.py @@ -330,7 +330,8 @@ def __init__( - `unicode_decode_error_handler`: The error handler to apply when a Unicode-related error occurs during BSON decoding that would otherwise raise :exc:`UnicodeDecodeError`. Valid options include - 'strict', 'replace', and 'ignore'. Defaults to 'strict'. + 'strict', 'replace', 'backslashreplace', 'surrogateescape', and + 'ignore'. Defaults to 'strict'. - `srvServiceName`: (string) The SRV service name to use for "mongodb+srv://" URIs. Defaults to "mongodb". Use it like so:: diff --git a/test/test_bson.py b/test/test_bson.py index b91bc7f5fb..eb4f4e47c2 100644 --- a/test/test_bson.py +++ b/test/test_bson.py @@ -994,57 +994,32 @@ def test_decode_all_defaults(self): def test_unicode_decode_error_handler(self): enc = encode({"keystr": "foobar"}) - # Test handling of bad key value. + # Test handling of bad key value, bad string value, and both. invalid_key = enc[:7] + b'\xe9' + enc[8:] - replaced_key = b'ke\xe9str'.decode('utf-8', 'replace') - ignored_key = b'ke\xe9str'.decode('utf-8', 'ignore') - - dec = decode(invalid_key, - CodecOptions(unicode_decode_error_handler="replace")) - self.assertEqual(dec, {replaced_key: "foobar"}) - - dec = decode(invalid_key, - CodecOptions(unicode_decode_error_handler="ignore")) - self.assertEqual(dec, {ignored_key: "foobar"}) - - self.assertRaises(InvalidBSON, decode, invalid_key, CodecOptions( - unicode_decode_error_handler="strict")) - self.assertRaises(InvalidBSON, decode, invalid_key, CodecOptions()) - self.assertRaises(InvalidBSON, decode, invalid_key) - - # Test handing of bad string value. - invalid_val = BSON(enc[:18] + b'\xe9' + enc[19:]) - replaced_val = b'fo\xe9bar'.decode('utf-8', 'replace') - ignored_val = b'fo\xe9bar'.decode('utf-8', 'ignore') - - dec = decode(invalid_val, - CodecOptions(unicode_decode_error_handler="replace")) - self.assertEqual(dec, {"keystr": replaced_val}) - - dec = decode(invalid_val, - CodecOptions(unicode_decode_error_handler="ignore")) - self.assertEqual(dec, {"keystr": ignored_val}) - - self.assertRaises(InvalidBSON, decode, invalid_val, CodecOptions( - unicode_decode_error_handler="strict")) - self.assertRaises(InvalidBSON, decode, invalid_val, CodecOptions()) - self.assertRaises(InvalidBSON, decode, invalid_val) - - # Test handing bad key + bad value. + invalid_val = enc[:18] + b'\xe9' + enc[19:] invalid_both = enc[:7] + b'\xe9' + enc[8:18] + b'\xe9' + enc[19:] - dec = decode(invalid_both, - CodecOptions(unicode_decode_error_handler="replace")) - self.assertEqual(dec, {replaced_key: replaced_val}) - - dec = decode(invalid_both, - CodecOptions(unicode_decode_error_handler="ignore")) - self.assertEqual(dec, {ignored_key: ignored_val}) - - self.assertRaises(InvalidBSON, decode, invalid_both, CodecOptions( - unicode_decode_error_handler="strict")) - self.assertRaises(InvalidBSON, decode, invalid_both, CodecOptions()) - self.assertRaises(InvalidBSON, decode, invalid_both) + # Ensure that strict mode raises an error. + for invalid in [invalid_key, invalid_val, invalid_both]: + self.assertRaises(InvalidBSON, decode, invalid, CodecOptions( + unicode_decode_error_handler="strict")) + self.assertRaises(InvalidBSON, decode, invalid, CodecOptions()) + self.assertRaises(InvalidBSON, decode, invalid) + + # Test all other error handlers. + for handler in ['replace', 'backslashreplace', 'surrogateescape', + 'ignore']: + expected_key = b'ke\xe9str'.decode('utf-8', handler) + expected_val = b'fo\xe9bar'.decode('utf-8', handler) + doc = decode(invalid_key, + CodecOptions(unicode_decode_error_handler=handler)) + self.assertEqual(doc, {expected_key: "foobar"}) + doc = decode(invalid_val, + CodecOptions(unicode_decode_error_handler=handler)) + self.assertEqual(doc, {"keystr": expected_val}) + doc = decode(invalid_both, + CodecOptions(unicode_decode_error_handler=handler)) + self.assertEqual(doc, {expected_key: expected_val}) # Test handling bad error mode. dec = decode(enc,