@@ -115,6 +115,10 @@ impl Sub for CharPos {
115
115
/// are *absolute* positions from the beginning of the codemap, not positions
116
116
/// relative to FileMaps. Methods on the CodeMap can be used to relate spans back
117
117
/// to the original source.
118
+ /// You must be careful if the span crosses more than one file - you will not be
119
+ /// able to use many of the functions on spans in codemap and you cannot assume
120
+ /// that the length of the span = hi - lo; there may be space in the BytePos
121
+ /// range between files.
118
122
#[ derive( Clone , Copy , Hash ) ]
119
123
pub struct Span {
120
124
pub lo : BytePos ,
@@ -339,7 +343,7 @@ pub struct MultiByteChar {
339
343
pub bytes : usize ,
340
344
}
341
345
342
- /// A single source in the CodeMap
346
+ /// A single source in the CodeMap.
343
347
pub struct FileMap {
344
348
/// The name of the file that the source came from, source that doesn't
345
349
/// originate from files has names between angle brackets by convention,
@@ -508,6 +512,9 @@ impl FileMap {
508
512
lines. get ( line_number) . map ( |& line| {
509
513
let begin: BytePos = line - self . start_pos ;
510
514
let begin = begin. to_usize ( ) ;
515
+ // We can't use `lines.get(line_number+1)` because we might
516
+ // be parsing when we call this function and thus the current
517
+ // line is the last one we have line info for.
511
518
let slice = & src[ begin..] ;
512
519
match slice. find ( '\n' ) {
513
520
Some ( e) => & slice[ ..e] ,
@@ -598,27 +605,27 @@ impl CodeMap {
598
605
Ok ( self . new_filemap ( path. to_str ( ) . unwrap ( ) . to_string ( ) , src) )
599
606
}
600
607
608
+ fn next_start_pos ( & self ) -> usize {
609
+ let files = self . files . borrow ( ) ;
610
+ match files. last ( ) {
611
+ None => 0 ,
612
+ // Add one so there is some space between files. This lets us distinguish
613
+ // positions in the codemap, even in the presence of zero-length files.
614
+ Some ( last) => last. end_pos . to_usize ( ) + 1 ,
615
+ }
616
+ }
617
+
618
+ /// Creates a new filemap without setting its line information. If you don't
619
+ /// intend to set the line information yourself, you should use new_filemap_and_lines.
601
620
pub fn new_filemap ( & self , filename : FileName , mut src : String ) -> Rc < FileMap > {
621
+ let start_pos = self . next_start_pos ( ) ;
602
622
let mut files = self . files . borrow_mut ( ) ;
603
- let start_pos = match files. last ( ) {
604
- None => 0 ,
605
- Some ( last) => last. end_pos . to_usize ( ) ,
606
- } ;
607
623
608
624
// Remove utf-8 BOM if any.
609
625
if src. starts_with ( "\u{feff} " ) {
610
626
src. drain ( ..3 ) ;
611
627
}
612
628
613
- // Append '\n' in case it's not already there.
614
- // This is a workaround to prevent CodeMap.lookup_filemap_idx from
615
- // accidentally overflowing into the next filemap in case the last byte
616
- // of span is also the last byte of filemap, which leads to incorrect
617
- // results from CodeMap.span_to_*.
618
- if !src. is_empty ( ) && !src. ends_with ( "\n " ) {
619
- src. push ( '\n' ) ;
620
- }
621
-
622
629
let end_pos = start_pos + src. len ( ) ;
623
630
624
631
let filemap = Rc :: new ( FileMap {
@@ -645,11 +652,8 @@ impl CodeMap {
645
652
mut file_local_lines : Vec < BytePos > ,
646
653
mut file_local_multibyte_chars : Vec < MultiByteChar > )
647
654
-> Rc < FileMap > {
655
+ let start_pos = self . next_start_pos ( ) ;
648
656
let mut files = self . files . borrow_mut ( ) ;
649
- let start_pos = match files. last ( ) {
650
- None => 0 ,
651
- Some ( last) => last. end_pos . to_usize ( ) ,
652
- } ;
653
657
654
658
let end_pos = Pos :: from_usize ( start_pos + source_len) ;
655
659
let start_pos = Pos :: from_usize ( start_pos) ;
@@ -686,39 +690,61 @@ impl CodeMap {
686
690
687
691
/// Lookup source information about a BytePos
688
692
pub fn lookup_char_pos ( & self , pos : BytePos ) -> Loc {
689
- let FileMapAndLine { fm : f, line : a} = self . lookup_line ( pos) ;
690
- let line = a + 1 ; // Line numbers start at 1
691
693
let chpos = self . bytepos_to_file_charpos ( pos) ;
692
- let linebpos = ( * f. lines . borrow ( ) ) [ a] ;
693
- let linechpos = self . bytepos_to_file_charpos ( linebpos) ;
694
- debug ! ( "byte pos {:?} is on the line at byte pos {:?}" ,
695
- pos, linebpos) ;
696
- debug ! ( "char pos {:?} is on the line at char pos {:?}" ,
697
- chpos, linechpos) ;
698
- debug ! ( "byte is on line: {}" , line) ;
699
- assert ! ( chpos >= linechpos) ;
700
- Loc {
701
- file : f,
702
- line : line,
703
- col : chpos - linechpos
694
+ match self . lookup_line ( pos) {
695
+ Ok ( FileMapAndLine { fm : f, line : a } ) => {
696
+ let line = a + 1 ; // Line numbers start at 1
697
+ let linebpos = ( * f. lines . borrow ( ) ) [ a] ;
698
+ let linechpos = self . bytepos_to_file_charpos ( linebpos) ;
699
+ debug ! ( "byte pos {:?} is on the line at byte pos {:?}" ,
700
+ pos, linebpos) ;
701
+ debug ! ( "char pos {:?} is on the line at char pos {:?}" ,
702
+ chpos, linechpos) ;
703
+ debug ! ( "byte is on line: {}" , line) ;
704
+ assert ! ( chpos >= linechpos) ;
705
+ Loc {
706
+ file : f,
707
+ line : line,
708
+ col : chpos - linechpos,
709
+ }
710
+ }
711
+ Err ( f) => {
712
+ Loc {
713
+ file : f,
714
+ line : 0 ,
715
+ col : chpos,
716
+ }
717
+ }
704
718
}
705
719
}
706
720
707
- fn lookup_line ( & self , pos : BytePos ) -> FileMapAndLine {
721
+ // If the relevant filemap is empty, we don't return a line number.
722
+ fn lookup_line ( & self , pos : BytePos ) -> Result < FileMapAndLine , Rc < FileMap > > {
708
723
let idx = self . lookup_filemap_idx ( pos) ;
709
724
710
725
let files = self . files . borrow ( ) ;
711
726
let f = ( * files) [ idx] . clone ( ) ;
727
+
728
+ let len = f. lines . borrow ( ) . len ( ) ;
729
+ if len == 0 {
730
+ return Err ( f) ;
731
+ }
732
+
712
733
let mut a = 0 ;
713
734
{
714
735
let lines = f. lines . borrow ( ) ;
715
736
let mut b = lines. len ( ) ;
716
737
while b - a > 1 {
717
738
let m = ( a + b) / 2 ;
718
- if ( * lines) [ m] > pos { b = m; } else { a = m; }
739
+ if ( * lines) [ m] > pos {
740
+ b = m;
741
+ } else {
742
+ a = m;
743
+ }
719
744
}
745
+ assert ! ( a <= lines. len( ) ) ;
720
746
}
721
- FileMapAndLine { fm : f, line : a}
747
+ Ok ( FileMapAndLine { fm : f, line : a } )
722
748
}
723
749
724
750
pub fn lookup_char_pos_adj ( & self , pos : BytePos ) -> LocWithOpt {
@@ -880,12 +906,15 @@ impl CodeMap {
880
906
CharPos ( bpos. to_usize ( ) - map. start_pos . to_usize ( ) - total_extra_bytes)
881
907
}
882
908
909
+ // Return the index of the filemap (in self.files) which contains pos.
883
910
fn lookup_filemap_idx ( & self , pos : BytePos ) -> usize {
884
911
let files = self . files . borrow ( ) ;
885
912
let files = & * files;
886
- let len = files. len ( ) ;
913
+ let count = files. len ( ) ;
914
+
915
+ // Binary search for the filemap.
887
916
let mut a = 0 ;
888
- let mut b = len ;
917
+ let mut b = count ;
889
918
while b - a > 1 {
890
919
let m = ( a + b) / 2 ;
891
920
if files[ m] . start_pos > pos {
@@ -894,26 +923,8 @@ impl CodeMap {
894
923
a = m;
895
924
}
896
925
}
897
- // There can be filemaps with length 0. These have the same start_pos as
898
- // the previous filemap, but are not the filemaps we want (because they
899
- // are length 0, they cannot contain what we are looking for). So,
900
- // rewind until we find a useful filemap.
901
- loop {
902
- let lines = files[ a] . lines . borrow ( ) ;
903
- let lines = lines;
904
- if !lines. is_empty ( ) {
905
- break ;
906
- }
907
- if a == 0 {
908
- panic ! ( "position {} does not resolve to a source location" ,
909
- pos. to_usize( ) ) ;
910
- }
911
- a -= 1 ;
912
- }
913
- if a >= len {
914
- panic ! ( "position {} does not resolve to a source location" ,
915
- pos. to_usize( ) )
916
- }
926
+
927
+ assert ! ( a < count, "position {} does not resolve to a source location" , pos. to_usize( ) ) ;
917
928
918
929
return a;
919
930
}
@@ -1027,10 +1038,13 @@ mod tests {
1027
1038
let fm = cm. new_filemap ( "blork.rs" . to_string ( ) ,
1028
1039
"first line.\n second line" . to_string ( ) ) ;
1029
1040
fm. next_line ( BytePos ( 0 ) ) ;
1041
+ // Test we can get lines with partial line info.
1030
1042
assert_eq ! ( fm. get_line( 0 ) , Some ( "first line." ) ) ;
1031
- // TESTING BROKEN BEHAVIOR:
1043
+ // TESTING BROKEN BEHAVIOR: line break declared before actual line break.
1032
1044
fm. next_line ( BytePos ( 10 ) ) ;
1033
1045
assert_eq ! ( fm. get_line( 1 ) , Some ( "." ) ) ;
1046
+ fm. next_line ( BytePos ( 12 ) ) ;
1047
+ assert_eq ! ( fm. get_line( 2 ) , Some ( "second line" ) ) ;
1034
1048
}
1035
1049
1036
1050
#[ test]
@@ -1056,9 +1070,9 @@ mod tests {
1056
1070
1057
1071
fm1. next_line ( BytePos ( 0 ) ) ;
1058
1072
fm1. next_line ( BytePos ( 12 ) ) ;
1059
- fm2. next_line ( BytePos ( 24 ) ) ;
1060
- fm3. next_line ( BytePos ( 24 ) ) ;
1061
- fm3. next_line ( BytePos ( 34 ) ) ;
1073
+ fm2. next_line ( fm2 . start_pos ) ;
1074
+ fm3. next_line ( fm3 . start_pos ) ;
1075
+ fm3. next_line ( fm3 . start_pos + BytePos ( 12 ) ) ;
1062
1076
1063
1077
cm
1064
1078
}
@@ -1068,11 +1082,15 @@ mod tests {
1068
1082
// Test lookup_byte_offset
1069
1083
let cm = init_code_map ( ) ;
1070
1084
1071
- let fmabp1 = cm. lookup_byte_offset ( BytePos ( 22 ) ) ;
1085
+ let fmabp1 = cm. lookup_byte_offset ( BytePos ( 23 ) ) ;
1072
1086
assert_eq ! ( fmabp1. fm. name, "blork.rs" ) ;
1073
- assert_eq ! ( fmabp1. pos, BytePos ( 22 ) ) ;
1087
+ assert_eq ! ( fmabp1. pos, BytePos ( 23 ) ) ;
1088
+
1089
+ let fmabp1 = cm. lookup_byte_offset ( BytePos ( 24 ) ) ;
1090
+ assert_eq ! ( fmabp1. fm. name, "empty.rs" ) ;
1091
+ assert_eq ! ( fmabp1. pos, BytePos ( 0 ) ) ;
1074
1092
1075
- let fmabp2 = cm. lookup_byte_offset ( BytePos ( 24 ) ) ;
1093
+ let fmabp2 = cm. lookup_byte_offset ( BytePos ( 25 ) ) ;
1076
1094
assert_eq ! ( fmabp2. fm. name, "blork2.rs" ) ;
1077
1095
assert_eq ! ( fmabp2. pos, BytePos ( 0 ) ) ;
1078
1096
}
@@ -1085,7 +1103,7 @@ mod tests {
1085
1103
let cp1 = cm. bytepos_to_file_charpos ( BytePos ( 22 ) ) ;
1086
1104
assert_eq ! ( cp1, CharPos ( 22 ) ) ;
1087
1105
1088
- let cp2 = cm. bytepos_to_file_charpos ( BytePos ( 24 ) ) ;
1106
+ let cp2 = cm. bytepos_to_file_charpos ( BytePos ( 25 ) ) ;
1089
1107
assert_eq ! ( cp2, CharPos ( 0 ) ) ;
1090
1108
}
1091
1109
@@ -1099,7 +1117,7 @@ mod tests {
1099
1117
assert_eq ! ( loc1. line, 2 ) ;
1100
1118
assert_eq ! ( loc1. col, CharPos ( 10 ) ) ;
1101
1119
1102
- let loc2 = cm. lookup_char_pos ( BytePos ( 24 ) ) ;
1120
+ let loc2 = cm. lookup_char_pos ( BytePos ( 25 ) ) ;
1103
1121
assert_eq ! ( loc2. file. name, "blork2.rs" ) ;
1104
1122
assert_eq ! ( loc2. line, 1 ) ;
1105
1123
assert_eq ! ( loc2. col, CharPos ( 0 ) ) ;
@@ -1115,18 +1133,18 @@ mod tests {
1115
1133
"first line€€.\n € second line" . to_string ( ) ) ;
1116
1134
1117
1135
fm1. next_line ( BytePos ( 0 ) ) ;
1118
- fm1. next_line ( BytePos ( 22 ) ) ;
1119
- fm2. next_line ( BytePos ( 40 ) ) ;
1120
- fm2. next_line ( BytePos ( 58 ) ) ;
1136
+ fm1. next_line ( BytePos ( 28 ) ) ;
1137
+ fm2. next_line ( fm2 . start_pos ) ;
1138
+ fm2. next_line ( fm2 . start_pos + BytePos ( 20 ) ) ;
1121
1139
1122
1140
fm1. record_multibyte_char ( BytePos ( 3 ) , 3 ) ;
1123
1141
fm1. record_multibyte_char ( BytePos ( 9 ) , 3 ) ;
1124
1142
fm1. record_multibyte_char ( BytePos ( 12 ) , 3 ) ;
1125
1143
fm1. record_multibyte_char ( BytePos ( 15 ) , 3 ) ;
1126
1144
fm1. record_multibyte_char ( BytePos ( 18 ) , 3 ) ;
1127
- fm2. record_multibyte_char ( BytePos ( 50 ) , 3 ) ;
1128
- fm2. record_multibyte_char ( BytePos ( 53 ) , 3 ) ;
1129
- fm2. record_multibyte_char ( BytePos ( 58 ) , 3 ) ;
1145
+ fm2. record_multibyte_char ( fm2 . start_pos + BytePos ( 10 ) , 3 ) ;
1146
+ fm2. record_multibyte_char ( fm2 . start_pos + BytePos ( 13 ) , 3 ) ;
1147
+ fm2. record_multibyte_char ( fm2 . start_pos + BytePos ( 18 ) , 3 ) ;
1130
1148
1131
1149
cm
1132
1150
}
0 commit comments