@@ -69,3 +69,81 @@ pub fn string_to_pat(source_str : @~str) -> @ast::pat {
69
69
pub fn strs_to_idents ( ids : ~[ & str ] ) -> ~[ ast:: ident ] {
70
70
ids. map ( |u| token:: str_to_ident ( * u) )
71
71
}
72
+
73
+ // does the given string match the pattern? whitespace in the first string
74
+ // may be deleted or replaced with other whitespace to match the pattern.
75
+ // this function is unicode-ignorant; fortunately, the careful design of
76
+ // UTF-8 mitigates this ignorance. In particular, this function only collapses
77
+ // sequences of \n, \r, ' ', and \t, but it should otherwise tolerate unicode
78
+ // chars. Unsurprisingly, it doesn't do NKF-normalization(?).
79
+ pub fn matches_codepattern ( a : & str , b : & str ) -> bool {
80
+ let mut idx_a = 0 ;
81
+ let mut idx_b = 0 ;
82
+ loop {
83
+ if ( idx_a == a. len ( ) && idx_b == b. len ( ) ) {
84
+ return true ;
85
+ }
86
+ else if ( idx_a == a. len ( ) ) { return false ; }
87
+ else if ( idx_b == b. len ( ) ) {
88
+ // maybe the stuff left in a is all ws?
89
+ if ( is_whitespace ( a. char_at ( idx_a) ) ) {
90
+ return ( scan_for_non_ws_or_end ( a, idx_a) == a. len ( ) ) ;
91
+ } else {
92
+ return false ;
93
+ }
94
+ }
95
+ // ws in both given and pattern:
96
+ else if ( is_whitespace ( a. char_at ( idx_a) )
97
+ && is_whitespace ( b. char_at ( idx_b) ) ) {
98
+ idx_a = scan_for_non_ws_or_end ( a, idx_a) ;
99
+ idx_b = scan_for_non_ws_or_end ( b, idx_b) ;
100
+ }
101
+ // ws in given only:
102
+ else if ( is_whitespace ( a. char_at ( idx_a) ) ) {
103
+ idx_a = scan_for_non_ws_or_end ( a, idx_a) ;
104
+ }
105
+ // *don't* silently eat ws in expected only.
106
+ else if ( a. char_at ( idx_a) == b. char_at ( idx_b) ) {
107
+ idx_a += 1 ;
108
+ idx_b += 1 ;
109
+ }
110
+ else {
111
+ return false ;
112
+ }
113
+ }
114
+ }
115
+
116
+ // given a string and an index, return the first uint >= idx
117
+ // that is a non-ws-char or is outside of the legal range of
118
+ // the string.
119
+ fn scan_for_non_ws_or_end ( a : & str , idx : uint ) -> uint {
120
+ let mut i = idx;
121
+ let len = a. len ( ) ;
122
+ while ( ( i < len) && ( is_whitespace ( a. char_at ( i) ) ) ) {
123
+ i += 1 ;
124
+ }
125
+ i
126
+ }
127
+
128
+ // copied from lexer.
129
+ pub fn is_whitespace ( c : char ) -> bool {
130
+ return c == ' ' || c == '\t' || c == '\r' || c == '\n' ;
131
+ }
132
+
133
+ #[ cfg( test) ]
134
+ mod test {
135
+ use super :: * ;
136
+
137
+ #[ test] fn eqmodws ( ) {
138
+ assert_eq ! ( matches_codepattern( "" , "" ) , true ) ;
139
+ assert_eq ! ( matches_codepattern( "" , "a" ) , false ) ;
140
+ assert_eq ! ( matches_codepattern( "a" , "" ) , false ) ;
141
+ assert_eq ! ( matches_codepattern( "a" , "a" ) , true ) ;
142
+ assert_eq ! ( matches_codepattern( "a b" , "a \n \t \r b" ) , true ) ;
143
+ assert_eq ! ( matches_codepattern( "a b " , "a \n \t \r b" ) , true ) ;
144
+ assert_eq ! ( matches_codepattern( "a b" , "a \n \t \r b " ) , false ) ;
145
+ assert_eq ! ( matches_codepattern( "a b" , "a b" ) , true ) ;
146
+ assert_eq ! ( matches_codepattern( "ab" , "a b" ) , false ) ;
147
+ assert_eq ! ( matches_codepattern( "a b" , "ab" ) , true ) ;
148
+ }
149
+ }
0 commit comments