@@ -31,13 +31,12 @@ use util::atom::Atom;
31
31
use util:: smallcharset:: SmallCharSet ;
32
32
33
33
use core:: mem:: replace;
34
- use core:: iter:: AdditiveIterator ;
35
34
use core:: default:: Default ;
36
35
use alloc:: boxed:: Box ;
37
36
use collections:: { MutableSeq , MutableMap } ;
38
37
use collections:: vec:: Vec ;
39
38
use collections:: string:: String ;
40
- use collections:: str:: { MaybeOwned , Slice , Owned } ;
39
+ use collections:: str:: { MaybeOwned , Slice } ;
41
40
use collections:: treemap:: TreeMap ;
42
41
43
42
pub mod states;
@@ -176,6 +175,10 @@ pub struct Tokenizer<'sink, Sink> {
176
175
impl < ' sink , Sink : TokenSink > Tokenizer < ' sink , Sink > {
177
176
/// Create a new tokenizer which feeds tokens to a particular `TokenSink`.
178
177
pub fn new ( sink : & ' sink mut Sink , mut opts : TokenizerOpts ) -> Tokenizer < ' sink , Sink > {
178
+ if opts. profile && cfg ! ( for_c) {
179
+ fail ! ( "Can't profile tokenizer when built as a C library" ) ;
180
+ }
181
+
179
182
let start_tag_name = opts. last_start_tag_name . take ( ) . map ( |s| Atom :: from_buf ( s) ) ;
180
183
let state = * opts. initial_state . as_ref ( ) . unwrap_or ( & states:: Data ) ;
181
184
let discard_bom = opts. discard_bom ;
@@ -253,11 +256,13 @@ impl<'sink, Sink: TokenSink> Tokenizer<'sink, Sink> {
253
256
n if ( n & 0xFFFE ) == 0xFFFE => true ,
254
257
_ => false ,
255
258
} {
256
- let msg = Owned ( format ! ( "Bad character {:?}" , c) ) ;
259
+ // format_if!(true) will still use the static error when built for C.
260
+ let msg = format_if ! ( true , "Bad character" ,
261
+ "Bad character {:?}" , c) ;
257
262
self . emit_error ( msg) ;
258
263
}
259
264
260
- debug ! ( "got character {:?}" , c) ;
265
+ h5e_debug ! ( "got character {:?}" , c) ;
261
266
self . current_char = c;
262
267
Some ( c)
263
268
}
@@ -284,7 +289,7 @@ impl<'sink, Sink: TokenSink> Tokenizer<'sink, Sink> {
284
289
}
285
290
286
291
let d = self . input_buffers . pop_except_from ( set) ;
287
- debug ! ( "got characters {}" , d) ;
292
+ h5e_debug ! ( "got characters {}" , d) ;
288
293
match d {
289
294
Some ( FromSet ( c) ) => self . get_preprocessed_char ( c) . map ( |x| FromSet ( x) ) ,
290
295
@@ -306,21 +311,21 @@ impl<'sink, Sink: TokenSink> Tokenizer<'sink, Sink> {
306
311
fn lookahead_and_consume ( & mut self , n : uint , p: |& str| -> bool) -> Option < bool > {
307
312
match self . input_buffers . pop_front ( n) {
308
313
None if self . at_eof => {
309
- debug ! ( "lookahead: requested {:u} characters not available and never will be" , n) ;
314
+ h5e_debug ! ( "lookahead: requested {:u} characters not available and never will be" , n) ;
310
315
Some ( false )
311
316
}
312
317
None => {
313
- debug ! ( "lookahead: requested {:u} characters not available" , n) ;
318
+ h5e_debug ! ( "lookahead: requested {:u} characters not available" , n) ;
314
319
self . wait_for = Some ( n) ;
315
320
None
316
321
}
317
322
Some ( s) => {
318
323
if p ( s. as_slice ( ) ) {
319
- debug ! ( "lookahead: condition satisfied by {:?}" , s) ;
324
+ h5e_debug ! ( "lookahead: condition satisfied by {:?}" , s) ;
320
325
// FIXME: set current input character?
321
326
Some ( true )
322
327
} else {
323
- debug ! ( "lookahead: condition not satisfied by {:?}" , s) ;
328
+ h5e_debug ! ( "lookahead: condition not satisfied by {:?}" , s) ;
324
329
self . unconsume ( s) ;
325
330
Some ( false )
326
331
}
@@ -569,7 +574,7 @@ macro_rules! shorthand (
569
574
// so it's behind a cfg flag.
570
575
#[ cfg( trace_tokenizer) ]
571
576
macro_rules! sh_trace ( ( $me: expr : $( $cmds: tt) * ) => ( {
572
- debug !( " {:s}" , stringify!( $( $cmds) * ) ) ;
577
+ h5e_debug !( " {:s}" , stringify!( $( $cmds) * ) ) ;
573
578
shorthand!( $me: expr : $( $cmds) * ) ;
574
579
} ) )
575
580
@@ -654,17 +659,17 @@ impl<'sink, Sink: TokenSink> Tokenizer<'sink, Sink> {
654
659
655
660
match self . wait_for {
656
661
Some ( n) if !self . input_buffers . has ( n) => {
657
- debug ! ( "lookahead: requested {:u} characters still not available" , n) ;
662
+ h5e_debug ! ( "lookahead: requested {:u} characters still not available" , n) ;
658
663
return false ;
659
664
}
660
- Some ( n ) => {
661
- debug ! ( "lookahead: requested {:u} characters become available" , n ) ;
665
+ Some ( _n ) => {
666
+ h5e_debug ! ( "lookahead: requested {:u} characters become available" , _n ) ;
662
667
self . wait_for = None ;
663
668
}
664
669
None => ( ) ,
665
670
}
666
671
667
- debug ! ( "processing in state {:?}" , self . state) ;
672
+ h5e_debug ! ( "processing in state {:?}" , self . state) ;
668
673
match self . state {
669
674
//§ data-state
670
675
states:: Data => loop {
@@ -1255,24 +1260,36 @@ impl<'sink, Sink: TokenSink> Tokenizer<'sink, Sink> {
1255
1260
}
1256
1261
1257
1262
if self . opts . profile {
1258
- let mut results: Vec < ( states:: State , u64 ) >
1259
- = self . state_profile . iter ( ) . map ( |( s, t) | ( * s, * t) ) . collect ( ) ;
1260
- results. sort_by ( |& ( _, x) , & ( _, y) | y. cmp ( & x) ) ;
1261
-
1262
- let total = results. iter ( ) . map ( |& ( _, t) | t) . sum ( ) ;
1263
- println ! ( "\n Tokenizer profile, in nanoseconds" ) ;
1264
- println ! ( "\n {:12u} total in token sink" , self . time_in_sink) ;
1265
- println ! ( "\n {:12u} total in tokenizer" , total) ;
1266
-
1267
- for ( k, v) in results. move_iter ( ) {
1268
- let pct = 100.0 * ( v as f64 ) / ( total as f64 ) ;
1269
- println ! ( "{:12u} {:4.1f}% {:?}" , v, pct, k) ;
1270
- }
1263
+ self . dump_profile ( ) ;
1264
+ }
1265
+ }
1266
+
1267
+ #[ cfg( for_c) ]
1268
+ fn dump_profile ( & self ) {
1269
+ unreachable ! ( ) ;
1270
+ }
1271
+
1272
+ #[ cfg( not( for_c) ) ]
1273
+ fn dump_profile ( & self ) {
1274
+ use core:: iter:: AdditiveIterator ;
1275
+
1276
+ let mut results: Vec < ( states:: State , u64 ) >
1277
+ = self . state_profile . iter ( ) . map ( |( s, t) | ( * s, * t) ) . collect ( ) ;
1278
+ results. sort_by ( |& ( _, x) , & ( _, y) | y. cmp ( & x) ) ;
1279
+
1280
+ let total = results. iter ( ) . map ( |& ( _, t) | t) . sum ( ) ;
1281
+ println ! ( "\n Tokenizer profile, in nanoseconds" ) ;
1282
+ println ! ( "\n {:12u} total in token sink" , self . time_in_sink) ;
1283
+ println ! ( "\n {:12u} total in tokenizer" , total) ;
1284
+
1285
+ for ( k, v) in results. move_iter ( ) {
1286
+ let pct = 100.0 * ( v as f64 ) / ( total as f64 ) ;
1287
+ println ! ( "{:12u} {:4.1f}% {:?}" , v, pct, k) ;
1271
1288
}
1272
1289
}
1273
1290
1274
1291
fn eof_step ( & mut self ) -> bool {
1275
- debug ! ( "processing EOF in state {:?}" , self . state) ;
1292
+ h5e_debug ! ( "processing EOF in state {:?}" , self . state) ;
1276
1293
match self . state {
1277
1294
states:: Data | states:: RawData ( Rcdata ) | states:: RawData ( Rawtext )
1278
1295
| states:: RawData ( ScriptData ) | states:: Plaintext
0 commit comments