@@ -994,8 +994,26 @@ impl<'a, W: Write> Write for LineWriterShim<'a, W> {
994
994
// the rest as possible). If there were any unwritten newlines, we
995
995
// only buffer out to the last unwritten newline; this helps prevent
996
996
// flushing partial lines on subsequent calls to LineWriterShim::write.
997
- let tail =
998
- if flushed < newline_idx { & buf[ flushed..newline_idx] } else { & buf[ newline_idx..] } ;
997
+
998
+ // Handle the cases in order of most-common to least-common, under
999
+ // the presumption that most writes succeed in totality, and that most
1000
+ // writes are smaller than the buffer.
1001
+ // - Is this a partial line (ie, no newlines left in the unwritten tail)
1002
+ // - If not, does the data out to the last unwritten newline fit in
1003
+ // the buffer?
1004
+ // - If not, scan for the last newline that *does* fit in the buffer
1005
+ let tail = if flushed >= newline_idx {
1006
+ & buf[ flushed..]
1007
+ } else if newline_idx - flushed <= self . buffer . capacity ( ) {
1008
+ & buf[ flushed..newline_idx]
1009
+ } else {
1010
+ let scan_area = & buf[ flushed..] ;
1011
+ let scan_area = & scan_area[ ..self . buffer . capacity ( ) ] ;
1012
+ match memchr:: memrchr ( b'\n' , scan_area) {
1013
+ Some ( newline_idx) => & scan_area[ ..newline_idx + 1 ] ,
1014
+ None => scan_area,
1015
+ }
1016
+ } ;
999
1017
1000
1018
let buffered = self . buffer . write_to_buf ( tail) ;
1001
1019
Ok ( flushed + buffered)
@@ -1809,8 +1827,11 @@ mod tests {
1809
1827
// Flush sets this flag
1810
1828
pub flushed : bool ,
1811
1829
1812
- // If true, writes & flushes will always be an error
1813
- pub always_error : bool ,
1830
+ // If true, writes will always be an error
1831
+ pub always_write_error : bool ,
1832
+
1833
+ // If true, flushes will always be an error
1834
+ pub always_flush_error : bool ,
1814
1835
1815
1836
// If set, only up to this number of bytes will be written in a single
1816
1837
// call to `write`
@@ -1827,8 +1848,8 @@ mod tests {
1827
1848
1828
1849
impl Write for ProgrammableSink {
1829
1850
fn write ( & mut self , data : & [ u8 ] ) -> io:: Result < usize > {
1830
- if self . always_error {
1831
- return Err ( io:: Error :: new ( io:: ErrorKind :: Other , "test - write always_error " ) ) ;
1851
+ if self . always_write_error {
1852
+ return Err ( io:: Error :: new ( io:: ErrorKind :: Other , "test - always_write_error " ) ) ;
1832
1853
}
1833
1854
1834
1855
match self . max_writes {
@@ -1852,8 +1873,8 @@ mod tests {
1852
1873
}
1853
1874
1854
1875
fn flush ( & mut self ) -> io:: Result < ( ) > {
1855
- if self . always_error {
1856
- Err ( io:: Error :: new ( io:: ErrorKind :: Other , "test - flush always_error " ) )
1876
+ if self . always_flush_error {
1877
+ Err ( io:: Error :: new ( io:: ErrorKind :: Other , "test - always_flush_error " ) )
1857
1878
} else {
1858
1879
self . flushed = true ;
1859
1880
Ok ( ( ) )
@@ -2205,4 +2226,60 @@ mod tests {
2205
2226
// An error from write_all leaves everything in an indeterminate state,
2206
2227
// so there's nothing else to test here
2207
2228
}
2229
+
2230
+ /// Under certain circumstances, the old implementation of LineWriter
2231
+ /// would try to buffer "to the last newline" but be forced to buffer
2232
+ /// less than that, leading to inappropriate partial line writes.
2233
+ /// Regression test for that issue.
2234
+ #[ test]
2235
+ fn partial_multiline_buffering ( ) {
2236
+ let writer = ProgrammableSink {
2237
+ // Write only up to 5 bytes at a time
2238
+ accept_prefix : Some ( 5 ) ,
2239
+ ..Default :: default ( )
2240
+ } ;
2241
+
2242
+ let mut writer = LineWriter :: with_capacity ( 10 , writer) ;
2243
+
2244
+ let content = b"AAAAABBBBB\n CCCCDDDDDD\n EEE" ;
2245
+
2246
+ // When content is written, LineWriter will try to write blocks A, B,
2247
+ // C, and D. Only block A will succeed. Under the old behavior, LineWriter
2248
+ // would then try to buffer B, C and D, but because its capacity is 10,
2249
+ // it will only be able to buffer B and C. We don't want it to buffer
2250
+ // partial lines if it can avoid it, so the correct behavior is to
2251
+ // only buffer block B (with its newline).
2252
+ assert_eq ! ( writer. write( content) . unwrap( ) , 11 ) ;
2253
+ assert_eq ! ( writer. get_ref( ) . buffer, * b"AAAAA" ) ;
2254
+
2255
+ writer. flush ( ) . unwrap ( ) ;
2256
+ assert_eq ! ( writer. get_ref( ) . buffer, * b"AAAAABBBBB\n " ) ;
2257
+ }
2258
+
2259
+ /// Same as test_partial_multiline_buffering, but in the event NO full lines
2260
+ /// fit in the buffer, just buffer as much as possible
2261
+ #[ test]
2262
+ fn partial_multiline_buffering_without_full_line ( ) {
2263
+ let writer = ProgrammableSink {
2264
+ // Write only up to 5 bytes at a time
2265
+ accept_prefix : Some ( 5 ) ,
2266
+ ..Default :: default ( )
2267
+ } ;
2268
+
2269
+ let mut writer = LineWriter :: with_capacity ( 5 , writer) ;
2270
+
2271
+ let content = b"AAAAABBBBBBBBBB\n CCCCC\n DDDDD" ;
2272
+
2273
+ // When content is written, LineWriter will try to write blocks A, B,
2274
+ // and C. Only block A will succeed. Under the old behavior, LineWriter
2275
+ // would then try to buffer B and C, but because its capacity is 5,
2276
+ // it will only be able to buffer part of B. Because it's not possible
2277
+ // for it to buffer any complete lines, it should buffer as much of B as
2278
+ // possible
2279
+ assert_eq ! ( writer. write( content) . unwrap( ) , 10 ) ;
2280
+ assert_eq ! ( writer. get_ref( ) . buffer, * b"AAAAA" ) ;
2281
+
2282
+ writer. flush ( ) . unwrap ( ) ;
2283
+ assert_eq ! ( writer. get_ref( ) . buffer, * b"AAAAABBBBB" ) ;
2284
+ }
2208
2285
}
0 commit comments