@@ -898,7 +898,7 @@ fn parse_unicode_escape<'de, R: Read<'de>>(
898
898
validate : bool ,
899
899
scratch : & mut Vec < u8 > ,
900
900
) -> Result < ( ) > {
901
- let n = tri ! ( read. decode_hex_escape( ) ) ;
901
+ let mut n = tri ! ( read. decode_hex_escape( ) ) ;
902
902
903
903
// Non-BMP characters are encoded as a sequence of two hex
904
904
// escapes, representing UTF-16 surrogates. If deserializing a
@@ -909,56 +909,64 @@ fn parse_unicode_escape<'de, R: Read<'de>>(
909
909
return error ( read, ErrorCode :: LoneLeadingSurrogateInHexEscape ) ;
910
910
}
911
911
912
- if n < 0xD800 || n > 0xDBFF {
913
- // Every u16 outside of the surrogate ranges is guaranteed to be a
914
- // legal char.
915
- push_wtf8_codepoint ( n as u32 , scratch) ;
916
- return Ok ( ( ) ) ;
917
- }
912
+ loop {
913
+ if n < 0xD800 || n > 0xDBFF {
914
+ // Every u16 outside of the surrogate ranges is guaranteed to be a
915
+ // legal char.
916
+ push_wtf8_codepoint ( n as u32 , scratch) ;
917
+ return Ok ( ( ) ) ;
918
+ }
918
919
919
- // n is a leading surrogate, we now expect a trailing surrogate.
920
- let n1 = n;
920
+ // n is a leading surrogate, we now expect a trailing surrogate.
921
+ let n1 = n;
921
922
922
- if tri ! ( peek_or_eof( read) ) == b'\\' {
923
- read. discard ( ) ;
924
- } else {
925
- return if validate {
923
+ if tri ! ( peek_or_eof( read) ) == b'\\' {
926
924
read. discard ( ) ;
927
- error ( read, ErrorCode :: UnexpectedEndOfHexEscape )
928
925
} else {
929
- push_wtf8_codepoint ( n1 as u32 , scratch) ;
930
- Ok ( ( ) )
931
- } ;
932
- }
926
+ return if validate {
927
+ read. discard ( ) ;
928
+ error ( read, ErrorCode :: UnexpectedEndOfHexEscape )
929
+ } else {
930
+ push_wtf8_codepoint ( n1 as u32 , scratch) ;
931
+ Ok ( ( ) )
932
+ } ;
933
+ }
933
934
934
- if tri ! ( peek_or_eof( read) ) == b'u' {
935
- read. discard ( ) ;
936
- } else {
937
- return if validate {
935
+ if tri ! ( peek_or_eof( read) ) == b'u' {
938
936
read. discard ( ) ;
939
- error ( read, ErrorCode :: UnexpectedEndOfHexEscape )
940
937
} else {
941
- push_wtf8_codepoint ( n1 as u32 , scratch) ;
942
- // The \ prior to this byte started an escape sequence,
943
- // so we need to parse that now. This recursive call
944
- // does not blow the stack on malicious input because
945
- // the escape is not \u, so it will be handled by one
946
- // of the easy nonrecursive cases.
947
- parse_escape ( read, validate, scratch)
948
- } ;
949
- }
938
+ return if validate {
939
+ read. discard ( ) ;
940
+ error ( read, ErrorCode :: UnexpectedEndOfHexEscape )
941
+ } else {
942
+ push_wtf8_codepoint ( n1 as u32 , scratch) ;
943
+ // The \ prior to this byte started an escape sequence,
944
+ // so we need to parse that now. This recursive call
945
+ // does not blow the stack on malicious input because
946
+ // the escape is not \u, so it will be handled by one
947
+ // of the easy nonrecursive cases.
948
+ parse_escape ( read, validate, scratch)
949
+ } ;
950
+ }
950
951
951
- let n2 = tri ! ( read. decode_hex_escape( ) ) ;
952
+ let n2 = tri ! ( read. decode_hex_escape( ) ) ;
952
953
953
- if n2 < 0xDC00 || n2 > 0xDFFF {
954
- return error ( read, ErrorCode :: LoneLeadingSurrogateInHexEscape ) ;
955
- }
954
+ if n2 < 0xDC00 || n2 > 0xDFFF {
955
+ if validate {
956
+ return error ( read, ErrorCode :: LoneLeadingSurrogateInHexEscape ) ;
957
+ }
958
+ push_wtf8_codepoint ( n1 as u32 , scratch) ;
959
+ // If n2 is a leading surrogate, we need to restart.
960
+ n = n2;
961
+ continue ;
962
+ }
956
963
957
- // This value is in range U+10000..=U+10FFFF, which is always a
958
- // valid codepoint.
959
- let n = ( ( ( n1 - 0xD800 ) as u32 ) << 10 | ( n2 - 0xDC00 ) as u32 ) + 0x1_0000 ;
960
- push_wtf8_codepoint ( n, scratch) ;
961
- Ok ( ( ) )
964
+ // This value is in range U+10000..=U+10FFFF, which is always a
965
+ // valid codepoint.
966
+ let n = ( ( ( n1 - 0xD800 ) as u32 ) << 10 | ( n2 - 0xDC00 ) as u32 ) + 0x1_0000 ;
967
+ push_wtf8_codepoint ( n, scratch) ;
968
+ return Ok ( ( ) ) ;
969
+ }
962
970
}
963
971
964
972
/// Adds a WTF-8 codepoint to the end of the buffer. This is a more efficient
0 commit comments