Skip to content

PYTHON-2361 Support parsing as extended JSON representation for subtype 4 binary #483

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions bson/json_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,8 @@ def _parse_legacy_uuid(doc, json_options):
"""Decode a JSON legacy $uuid to Python UUID."""
if len(doc) != 1:
raise TypeError('Bad $uuid, extra field(s): %s' % (doc,))
if not isinstance(doc["$uuid"], text_type):
raise TypeError('$uuid must be a string: %s' % (doc,))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Used the same format for the error message as $binary base64

if json_options.uuid_representation == UuidRepresentation.UNSPECIFIED:
return Binary.from_uuid(uuid.UUID(doc["$uuid"]))
else:
Expand Down
10 changes: 8 additions & 2 deletions test/bson_corpus/array.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,22 @@
"canonical_extjson": "{\"a\" : [{\"$numberInt\": \"10\"}]}"
},
{
"description": "Single Element Array with index set incorrectly",
"description": "Single Element Array with index set incorrectly to empty string",
"degenerate_bson": "130000000461000B00000010000A0000000000",
"canonical_bson": "140000000461000C0000001030000A0000000000",
"canonical_extjson": "{\"a\" : [{\"$numberInt\": \"10\"}]}"
},
{
"description": "Single Element Array with index set incorrectly",
"description": "Single Element Array with index set incorrectly to ab",
"degenerate_bson": "150000000461000D000000106162000A0000000000",
"canonical_bson": "140000000461000C0000001030000A0000000000",
"canonical_extjson": "{\"a\" : [{\"$numberInt\": \"10\"}]}"
},
{
"description": "Multi Element Array with duplicate indexes",
"degenerate_bson": "1b000000046100130000001030000a000000103000140000000000",
"canonical_bson": "1b000000046100130000001030000a000000103100140000000000",
"canonical_extjson": "{\"a\" : [{\"$numberInt\": \"10\"}, {\"$numberInt\": \"20\"}]}"
}
],
"decodeErrors": [
Expand Down
16 changes: 16 additions & 0 deletions test/bson_corpus/binary.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@
"canonical_bson": "1D000000057800100000000473FFD26444B34C6990E8E7D1DFC035D400",
"canonical_extjson": "{\"x\" : { \"$binary\" : {\"base64\" : \"c//SZESzTGmQ6OfR38A11A==\", \"subType\" : \"04\"}}}"
},
{
"description": "subtype 0x04 UUID",
"canonical_bson": "1D000000057800100000000473FFD26444B34C6990E8E7D1DFC035D400",
"canonical_extjson": "{\"x\" : { \"$binary\" : {\"base64\" : \"c//SZESzTGmQ6OfR38A11A==\", \"subType\" : \"04\"}}}",
"degenerate_extjson": "{\"x\" : { \"$uuid\" : \"73ffd264-44b3-4c69-90e8-e7d1dfc035d4\"}}"
},
{
"description": "subtype 0x05",
"canonical_bson": "1D000000057800100000000573FFD26444B34C6990E8E7D1DFC035D400",
Expand Down Expand Up @@ -81,5 +87,15 @@
"description": "subtype 0x02 length negative one",
"bson": "130000000578000600000002FFFFFFFFFFFF00"
}
],
"parseErrors": [
{
"description": "$uuid wrong type",
"string": "{\"x\" : { \"$uuid\" : { \"data\" : \"73ffd264-44b3-4c69-90e8-e7d1dfc035d4\"}}}"
},
{
"description": "$uuid invalid value",
"string": "{\"x\" : { \"$uuid\" : \"73ffd264-44b3-90e8-e7d1dfc035d4\"}}"
}
]
}
6 changes: 6 additions & 0 deletions test/bson_corpus/datetime.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@
"description" : "Y10K",
"canonical_bson" : "1000000009610000DC1FD277E6000000",
"canonical_extjson" : "{\"a\":{\"$date\":{\"$numberLong\":\"253402300800000\"}}}"
},
{
"description": "leading zero ms",
"canonical_bson": "10000000096100D1D6D6CC3B01000000",
"relaxed_extjson": "{\"a\" : {\"$date\" : \"2012-12-24T12:15:30.001Z\"}}",
"canonical_extjson": "{\"a\" : {\"$date\" : {\"$numberLong\" : \"1356351330001\"}}}"
}
],
"decodeErrors": [
Expand Down
1 change: 1 addition & 0 deletions test/bson_corpus/decimal128-2.json
Original file line number Diff line number Diff line change
Expand Up @@ -790,3 +790,4 @@
}
]
}

1 change: 1 addition & 0 deletions test/bson_corpus/decimal128-5.json
Original file line number Diff line number Diff line change
Expand Up @@ -399,3 +399,4 @@
}
]
}

16 changes: 8 additions & 8 deletions test/bson_corpus/double.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,16 @@
"relaxed_extjson": "{\"d\" : -1.0001220703125}"
},
{
"description": "1.2345678901234568e+18",
"canonical_bson": "1000000001640081E97DF41022B14300",
"canonical_extjson": "{\"d\" : {\"$numberDouble\": \"1.2345678901234568e+18\"}}",
"relaxed_extjson": "{\"d\" : 1.2345678901234568E+18}"
"description": "1.2345678921232E+18",
"canonical_bson": "100000000164002a1bf5f41022b14300",
"canonical_extjson": "{\"d\" : {\"$numberDouble\": \"1.2345678921232e+18\"}}",
"relaxed_extjson": "{\"d\" : 1.2345678921232E+18}"
},
{
"description": "-1.2345678901234568e+18",
"canonical_bson": "1000000001640081E97DF41022B1C300",
"canonical_extjson": "{\"d\" : {\"$numberDouble\": \"-1.2345678901234568e+18\"}}",
"relaxed_extjson": "{\"d\" : -1.2345678901234568e+18}"
"description": "-1.2345678921232E+18",
"canonical_bson": "100000000164002a1bf5f41022b1c300",
"canonical_extjson": "{\"d\" : {\"$numberDouble\": \"-1.2345678921232e+18\"}}",
"relaxed_extjson": "{\"d\" : -1.2345678921232E+18}"
},
{
"description": "0.0",
Expand Down
9 changes: 5 additions & 4 deletions test/bson_corpus/multi-type-deprecated.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
"valid": [
{
"description": "All BSON types",
"canonical_bson": "3B020000075F69640057E193D7A9CC81B4027498B50E53796D626F6C000700000073796D626F6C0002537472696E670007000000737472696E670010496E743332002A00000012496E743634002A0000000000000001446F75626C6500000000000000F0BF0542696E617279001000000003A34C38F7C3ABEDC8A37814A992AB8DB60542696E61727955736572446566696E656400050000008001020304050D436F6465000E00000066756E6374696F6E2829207B7D000F436F64655769746853636F7065001B0000000E00000066756E6374696F6E2829207B7D00050000000003537562646F63756D656E74001200000002666F6F0004000000626172000004417272617900280000001030000100000010310002000000103200030000001033000400000010340005000000001154696D657374616D7000010000002A0000000B5265676578007061747465726E0000094461746574696D6545706F6368000000000000000000094461746574696D65506F73697469766500FFFFFF7F00000000094461746574696D654E656761746976650000000080FFFFFFFF085472756500010846616C736500000C4442506F696E746572000E00000064622E636F6C6C656374696F6E0057E193D7A9CC81B4027498B1034442526566003D0000000224726566000B000000636F6C6C656374696F6E00072469640057FD71E96E32AB4225B723FB02246462000900000064617461626173650000FF4D696E6B6579007F4D61786B6579000A4E756C6C0006556E646566696E65640000",
"converted_bson": "4b020000075f69640057e193d7a9cc81b4027498b50253796d626f6c000700000073796d626f6c0002537472696e670007000000737472696e670010496e743332002a00000012496e743634002a0000000000000001446f75626c6500000000000000f0bf0542696e617279001000000003a34c38f7c3abedc8a37814a992ab8db60542696e61727955736572446566696e656400050000008001020304050d436f6465000e00000066756e6374696f6e2829207b7d000f436f64655769746853636f7065001b0000000e00000066756e6374696f6e2829207b7d00050000000003537562646f63756d656e74001200000002666f6f0004000000626172000004417272617900280000001030000100000010310002000000103200030000001033000400000010340005000000001154696d657374616d7000010000002a0000000b5265676578007061747465726e0000094461746574696d6545706f6368000000000000000000094461746574696d65506f73697469766500ffffff7f00000000094461746574696d654e656761746976650000000080ffffffff085472756500010846616c73650000034442506f696e746572002e0000000224726566000e00000064622e636f6c6c656374696f6e00072469640057e193d7a9cc81b4027498b100034442526566003d0000000224726566000b000000636f6c6c656374696f6e00072469640057fd71e96e32ab4225b723fb02246462000900000064617461626173650000ff4d696e6b6579007f4d61786b6579000a4e756c6c000a556e646566696e65640000",
"canonical_extjson": "{\"_id\": {\"$oid\": \"57e193d7a9cc81b4027498b5\"}, \"Symbol\": {\"$symbol\": \"symbol\"}, \"String\": \"string\", \"Int32\": {\"$numberInt\": \"42\"}, \"Int64\": {\"$numberLong\": \"42\"}, \"Double\": {\"$numberDouble\": \"-1.0\"}, \"Binary\": { \"$binary\" : {\"base64\": \"o0w498Or7cijeBSpkquNtg==\", \"subType\": \"03\"}}, \"BinaryUserDefined\": { \"$binary\" : {\"base64\": \"AQIDBAU=\", \"subType\": \"80\"}}, \"Code\": {\"$code\": \"function() {}\"}, \"CodeWithScope\": {\"$code\": \"function() {}\", \"$scope\": {}}, \"Subdocument\": {\"foo\": \"bar\"}, \"Array\": [{\"$numberInt\": \"1\"}, {\"$numberInt\": \"2\"}, {\"$numberInt\": \"3\"}, {\"$numberInt\": \"4\"}, {\"$numberInt\": \"5\"}], \"Timestamp\": {\"$timestamp\": {\"t\": 42, \"i\": 1}}, \"Regex\": {\"$regularExpression\": {\"pattern\": \"pattern\", \"options\": \"\"}}, \"DatetimeEpoch\": {\"$date\": {\"$numberLong\": \"0\"}}, \"DatetimePositive\": {\"$date\": {\"$numberLong\": \"2147483647\"}}, \"DatetimeNegative\": {\"$date\": {\"$numberLong\": \"-2147483648\"}}, \"True\": true, \"False\": false, \"DBPointer\": {\"$dbPointer\": {\"$ref\": \"db.collection\", \"$id\": {\"$oid\": \"57e193d7a9cc81b4027498b1\"}}}, \"DBRef\": {\"$ref\": \"collection\", \"$id\": {\"$oid\": \"57fd71e96e32ab4225b723fb\"}, \"$db\": \"database\"}, \"Minkey\": {\"$minKey\": 1}, \"Maxkey\": {\"$maxKey\": 1}, \"Null\": null, \"Undefined\": {\"$undefined\": true}}",
"converted_extjson": "{\"_id\": {\"$oid\": \"57e193d7a9cc81b4027498b5\"}, \"Symbol\": \"symbol\", \"String\": \"string\", \"Int32\": {\"$numberInt\": \"42\"}, \"Int64\": {\"$numberLong\": \"42\"}, \"Double\": {\"$numberDouble\": \"-1.0\"}, \"Binary\": { \"$binary\" : {\"base64\": \"o0w498Or7cijeBSpkquNtg==\", \"subType\": \"03\"}}, \"BinaryUserDefined\": { \"$binary\" : {\"base64\": \"AQIDBAU=\", \"subType\": \"80\"}}, \"Code\": {\"$code\": \"function() {}\"}, \"CodeWithScope\": {\"$code\": \"function() {}\", \"$scope\": {}}, \"Subdocument\": {\"foo\": \"bar\"}, \"Array\": [{\"$numberInt\": \"1\"}, {\"$numberInt\": \"2\"}, {\"$numberInt\": \"3\"}, {\"$numberInt\": \"4\"}, {\"$numberInt\": \"5\"}], \"Timestamp\": {\"$timestamp\": {\"t\": 42, \"i\": 1}}, \"Regex\": {\"$regularExpression\": {\"pattern\": \"pattern\", \"options\": \"\"}}, \"DatetimeEpoch\": {\"$date\": {\"$numberLong\": \"0\"}}, \"DatetimePositive\": {\"$date\": {\"$numberLong\": \"2147483647\"}}, \"DatetimeNegative\": {\"$date\": {\"$numberLong\": \"-2147483648\"}}, \"True\": true, \"False\": false, \"DBPointer\": {\"$ref\": \"db.collection\", \"$id\": {\"$oid\": \"57e193d7a9cc81b4027498b1\"}}, \"DBRef\": {\"$ref\": \"collection\", \"$id\": {\"$oid\": \"57fd71e96e32ab4225b723fb\"}, \"$db\": \"database\"}, \"Minkey\": {\"$minKey\": 1}, \"Maxkey\": {\"$maxKey\": 1}, \"Null\": null, \"Undefined\": null}"
"canonical_bson": "38020000075F69640057E193D7A9CC81B4027498B50E53796D626F6C000700000073796D626F6C0002537472696E670007000000737472696E670010496E743332002A00000012496E743634002A0000000000000001446F75626C6500000000000000F0BF0542696E617279001000000003A34C38F7C3ABEDC8A37814A992AB8DB60542696E61727955736572446566696E656400050000008001020304050D436F6465000E00000066756E6374696F6E2829207B7D000F436F64655769746853636F7065001B0000000E00000066756E6374696F6E2829207B7D00050000000003537562646F63756D656E74001200000002666F6F0004000000626172000004417272617900280000001030000100000010310002000000103200030000001033000400000010340005000000001154696D657374616D7000010000002A0000000B5265676578007061747465726E0000094461746574696D6545706F6368000000000000000000094461746574696D65506F73697469766500FFFFFF7F00000000094461746574696D654E656761746976650000000080FFFFFFFF085472756500010846616C736500000C4442506F696E746572000B000000636F6C6C656374696F6E0057E193D7A9CC81B4027498B1034442526566003D0000000224726566000B000000636F6C6C656374696F6E00072469640057FD71E96E32AB4225B723FB02246462000900000064617461626173650000FF4D696E6B6579007F4D61786B6579000A4E756C6C0006556E646566696E65640000",
"converted_bson": "48020000075f69640057e193d7a9cc81b4027498b50253796d626f6c000700000073796d626f6c0002537472696e670007000000737472696e670010496e743332002a00000012496e743634002a0000000000000001446f75626c6500000000000000f0bf0542696e617279001000000003a34c38f7c3abedc8a37814a992ab8db60542696e61727955736572446566696e656400050000008001020304050d436f6465000e00000066756e6374696f6e2829207b7d000f436f64655769746853636f7065001b0000000e00000066756e6374696f6e2829207b7d00050000000003537562646f63756d656e74001200000002666f6f0004000000626172000004417272617900280000001030000100000010310002000000103200030000001033000400000010340005000000001154696d657374616d7000010000002a0000000b5265676578007061747465726e0000094461746574696d6545706f6368000000000000000000094461746574696d65506f73697469766500ffffff7f00000000094461746574696d654e656761746976650000000080ffffffff085472756500010846616c73650000034442506f696e746572002b0000000224726566000b000000636f6c6c656374696f6e00072469640057e193d7a9cc81b4027498b100034442526566003d0000000224726566000b000000636f6c6c656374696f6e00072469640057fd71e96e32ab4225b723fb02246462000900000064617461626173650000ff4d696e6b6579007f4d61786b6579000a4e756c6c000a556e646566696e65640000",
"canonical_extjson": "{\"_id\": {\"$oid\": \"57e193d7a9cc81b4027498b5\"}, \"Symbol\": {\"$symbol\": \"symbol\"}, \"String\": \"string\", \"Int32\": {\"$numberInt\": \"42\"}, \"Int64\": {\"$numberLong\": \"42\"}, \"Double\": {\"$numberDouble\": \"-1.0\"}, \"Binary\": { \"$binary\" : {\"base64\": \"o0w498Or7cijeBSpkquNtg==\", \"subType\": \"03\"}}, \"BinaryUserDefined\": { \"$binary\" : {\"base64\": \"AQIDBAU=\", \"subType\": \"80\"}}, \"Code\": {\"$code\": \"function() {}\"}, \"CodeWithScope\": {\"$code\": \"function() {}\", \"$scope\": {}}, \"Subdocument\": {\"foo\": \"bar\"}, \"Array\": [{\"$numberInt\": \"1\"}, {\"$numberInt\": \"2\"}, {\"$numberInt\": \"3\"}, {\"$numberInt\": \"4\"}, {\"$numberInt\": \"5\"}], \"Timestamp\": {\"$timestamp\": {\"t\": 42, \"i\": 1}}, \"Regex\": {\"$regularExpression\": {\"pattern\": \"pattern\", \"options\": \"\"}}, \"DatetimeEpoch\": {\"$date\": {\"$numberLong\": \"0\"}}, \"DatetimePositive\": {\"$date\": {\"$numberLong\": \"2147483647\"}}, \"DatetimeNegative\": {\"$date\": {\"$numberLong\": \"-2147483648\"}}, \"True\": true, \"False\": false, \"DBPointer\": {\"$dbPointer\": {\"$ref\": \"collection\", \"$id\": {\"$oid\": \"57e193d7a9cc81b4027498b1\"}}}, \"DBRef\": {\"$ref\": \"collection\", \"$id\": {\"$oid\": \"57fd71e96e32ab4225b723fb\"}, \"$db\": \"database\"}, \"Minkey\": {\"$minKey\": 1}, \"Maxkey\": {\"$maxKey\": 1}, \"Null\": null, \"Undefined\": {\"$undefined\": true}}",
"converted_extjson": "{\"_id\": {\"$oid\": \"57e193d7a9cc81b4027498b5\"}, \"Symbol\": \"symbol\", \"String\": \"string\", \"Int32\": {\"$numberInt\": \"42\"}, \"Int64\": {\"$numberLong\": \"42\"}, \"Double\": {\"$numberDouble\": \"-1.0\"}, \"Binary\": { \"$binary\" : {\"base64\": \"o0w498Or7cijeBSpkquNtg==\", \"subType\": \"03\"}}, \"BinaryUserDefined\": { \"$binary\" : {\"base64\": \"AQIDBAU=\", \"subType\": \"80\"}}, \"Code\": {\"$code\": \"function() {}\"}, \"CodeWithScope\": {\"$code\": \"function() {}\", \"$scope\": {}}, \"Subdocument\": {\"foo\": \"bar\"}, \"Array\": [{\"$numberInt\": \"1\"}, {\"$numberInt\": \"2\"}, {\"$numberInt\": \"3\"}, {\"$numberInt\": \"4\"}, {\"$numberInt\": \"5\"}], \"Timestamp\": {\"$timestamp\": {\"t\": 42, \"i\": 1}}, \"Regex\": {\"$regularExpression\": {\"pattern\": \"pattern\", \"options\": \"\"}}, \"DatetimeEpoch\": {\"$date\": {\"$numberLong\": \"0\"}}, \"DatetimePositive\": {\"$date\": {\"$numberLong\": \"2147483647\"}}, \"DatetimeNegative\": {\"$date\": {\"$numberLong\": \"-2147483648\"}}, \"True\": true, \"False\": false, \"DBPointer\": {\"$ref\": \"collection\", \"$id\": {\"$oid\": \"57e193d7a9cc81b4027498b1\"}}, \"DBRef\": {\"$ref\": \"collection\", \"$id\": {\"$oid\": \"57fd71e96e32ab4225b723fb\"}, \"$db\": \"database\"}, \"Minkey\": {\"$minKey\": 1}, \"Maxkey\": {\"$maxKey\": 1}, \"Null\": null, \"Undefined\": null}"
}
]
}

5 changes: 5 additions & 0 deletions test/bson_corpus/timestamp.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
"description": "Timestamp with high-order bit set on both seconds and increment",
"canonical_bson": "10000000116100FFFFFFFFFFFFFFFF00",
"canonical_extjson": "{\"a\" : {\"$timestamp\" : {\"t\" : 4294967295, \"i\" : 4294967295} } }"
},
{
"description": "Timestamp with high-order bit set on both seconds and increment (not UINT32_MAX)",
"canonical_bson": "1000000011610000286BEE00286BEE00",
"canonical_extjson": "{\"a\" : {\"$timestamp\" : {\"t\" : 4000000000, \"i\" : 4000000000} } }"
}
],
"decodeErrors": [
Expand Down
10 changes: 7 additions & 3 deletions test/bson_corpus/top.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,11 @@
"parseErrors": [
{
"description" : "Bad $regularExpression (extra field)",
"string" : "{\"a\" : \"$regularExpression\": {\"pattern\": \"abc\", \"options\": \"\", \"unrelated\": true}}}"
"string" : "{\"a\" : {\"$regularExpression\": {\"pattern\": \"abc\", \"options\": \"\", \"unrelated\": true}}}"
},
{
"description" : "Bad $regularExpression (missing options field)",
"string" : "{\"a\" : \"$regularExpression\": {\"pattern\": \"abc\"}}}"
"string" : "{\"a\" : {\"$regularExpression\": {\"pattern\": \"abc\"}}}"
},
{
"description": "Bad $regularExpression (pattern is number, not string)",
Expand All @@ -85,7 +85,7 @@
},
{
"description" : "Bad $regularExpression (missing pattern field)",
"string" : "{\"a\" : \"$regularExpression\": {\"options\":\"ix\"}}}"
"string" : "{\"a\" : {\"$regularExpression\": {\"options\":\"ix\"}}}"
},
{
"description": "Bad $oid (number, not string)",
Expand Down Expand Up @@ -151,6 +151,10 @@
"description": "Bad $code (type is number, not string)",
"string": "{\"a\" : {\"$code\" : 42}}"
},
{
"description": "Bad $code (type is number, not string) when $scope is also present",
"string": "{\"a\" : {\"$code\" : 42, \"$scope\" : {}}}"
},
{
"description": "Bad $code (extra field)",
"string": "{\"a\" : {\"$code\" : \"\", \"unrelated\": true}}"
Expand Down
9 changes: 8 additions & 1 deletion test/test_bson_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def run_test(self):
continue

# Special case for testing encoding UUID as binary subtype 0x04.
if description == 'subtype 0x04':
if description.startswith('subtype 0x04'):
encode_extjson = to_extjson_uuid_04
encode_bson = to_bson_uuid_04
else:
Expand Down Expand Up @@ -203,6 +203,13 @@ def run_test(self):
'case: ' + description)
except (ValueError, KeyError, TypeError, InvalidId):
pass
elif bson_type == '0x05':
try:
decode_extjson(parse_error_case['string'])
raise AssertionError('exception not raised for test '
'case: ' + description)
except (TypeError, ValueError):
pass
else:
raise AssertionError('cannot test parseErrors for type ' +
bson_type)
Expand Down