@@ -22,8 +22,11 @@ const BUF_SIZE: usize = 8192;
22
22
/// size of the buffer, rather than the full length of the encoded data, and
23
23
/// because it doesn't need to reallocate memory along the way.
24
24
pub struct FileEncoder {
25
- /// The input buffer. For adequate performance, we need to be able to write
26
- /// directly to the unwritten region of the buffer, without calling copy_from_slice.
25
+ // The input buffer. For adequate performance, we need to be able to write
26
+ // directly to the unwritten region of the buffer, without calling copy_from_slice.
27
+ // Note that our buffer is always initialized so that we can do that direct access
28
+ // without unsafe code. Users of this type write many more than BUF_SIZE bytes, so the
29
+ // initialization is approximately free.
27
30
buf : Box < [ u8 ; BUF_SIZE ] > ,
28
31
buffered : usize ,
29
32
flushed : usize ,
@@ -54,13 +57,12 @@ impl FileEncoder {
54
57
55
58
#[ cold]
56
59
#[ inline( never) ]
57
- pub fn flush ( & mut self ) -> & mut [ u8 ; BUF_SIZE ] {
60
+ pub fn flush ( & mut self ) {
58
61
if self . res . is_ok ( ) {
59
62
self . res = self . file . write_all ( & self . buf [ ..self . buffered ] ) ;
60
63
}
61
64
self . flushed += self . buffered ;
62
65
self . buffered = 0 ;
63
- & mut self . buf
64
66
}
65
67
66
68
pub fn file ( & self ) -> & File {
@@ -76,7 +78,8 @@ impl FileEncoder {
76
78
#[ cold]
77
79
#[ inline( never) ]
78
80
fn write_all_cold_path ( & mut self , buf : & [ u8 ] ) {
79
- if let Some ( dest) = self . flush ( ) . get_mut ( ..buf. len ( ) ) {
81
+ self . flush ( ) ;
82
+ if let Some ( dest) = self . buf . get_mut ( ..buf. len ( ) ) {
80
83
dest. copy_from_slice ( buf) ;
81
84
self . buffered += buf. len ( ) ;
82
85
} else {
@@ -99,13 +102,20 @@ impl FileEncoder {
99
102
100
103
/// Write up to `N` bytes to this encoder.
101
104
///
102
- /// Whenever possible, use this function to do writes whose length has a small and
103
- /// compile-time constant upper bound.
105
+ /// This function can be used to avoid the overhead of calling memcpy for writes that
106
+ /// have runtime-variable length, but are small and have a small fixed upper bound.
107
+ ///
108
+ /// This can be used to do in-place encoding as is done for leb128 (without this function
109
+ /// we would need to write to a temporary buffer then memcpy into the encoder), and it can
110
+ /// also be used to implement the varint scheme we use for rmeta and dep graph encoding,
111
+ /// where we only want to encode the first few bytes of an integer. Copying in the whole
112
+ /// integer then only advancing the encoder state for the few bytes we care about is more
113
+ /// efficient than calling [`FileEncoder::write_all`], because variable-size copies are
114
+ /// always lowered to `memcpy`, which has overhead and contains a lot of logic we can bypass
115
+ /// with this function. Note that common architectures support fixed-size writes up to 8 bytes
116
+ /// with one instruction, so while this does in some sense do wasted work, we come out ahead.
104
117
#[ inline]
105
- pub fn write_with < const N : usize , V > ( & mut self , mut visitor : V )
106
- where
107
- V : FnMut ( & mut [ u8 ; N ] ) -> usize ,
108
- {
118
+ pub fn write_with < const N : usize > ( & mut self , visitor : impl FnOnce ( & mut [ u8 ; N ] ) -> usize ) {
109
119
let flush_threshold = const { BUF_SIZE . checked_sub ( N ) . unwrap ( ) } ;
110
120
if std:: intrinsics:: unlikely ( self . buffered > flush_threshold) {
111
121
self . flush ( ) ;
@@ -115,26 +125,50 @@ impl FileEncoder {
115
125
// We produce a post-mono error if N > BUF_SIZE.
116
126
let buf = unsafe { self . buffer_empty ( ) . first_chunk_mut :: < N > ( ) . unwrap_unchecked ( ) } ;
117
127
let written = visitor ( buf) ;
118
- debug_assert ! ( written <= N ) ;
119
128
// We have to ensure that an errant visitor cannot cause self.buffered to exeed BUF_SIZE.
120
- self . buffered += written. min ( N ) ;
129
+ if written > N {
130
+ Self :: panic_invalid_write :: < N > ( written) ;
131
+ }
132
+ self . buffered += written;
133
+ }
134
+
135
+ #[ cold]
136
+ #[ inline( never) ]
137
+ fn panic_invalid_write < const N : usize > ( written : usize ) {
138
+ panic ! ( "FileEncoder::write_with::<{N}> cannot be used to write {written} bytes" ) ;
139
+ }
140
+
141
+ /// Helper for calls where [`FileEncoder::write_with`] always writes the whole array.
142
+ #[ inline]
143
+ pub fn write_array < const N : usize > ( & mut self , buf : [ u8 ; N ] ) {
144
+ self . write_with ( |dest| {
145
+ * dest = buf;
146
+ N
147
+ } )
121
148
}
122
149
123
150
pub fn finish ( mut self ) -> Result < usize , io:: Error > {
124
151
self . flush ( ) ;
125
- match self . res {
152
+ match std :: mem :: replace ( & mut self . res , Ok ( ( ) ) ) {
126
153
Ok ( ( ) ) => Ok ( self . position ( ) ) ,
127
154
Err ( e) => Err ( e) ,
128
155
}
129
156
}
130
157
}
131
158
159
+ impl Drop for FileEncoder {
160
+ fn drop ( & mut self ) {
161
+ // Likely to be a no-op, because `finish` should have been called and
162
+ // it also flushes. But do it just in case.
163
+ self . flush ( ) ;
164
+ }
165
+ }
166
+
132
167
macro_rules! write_leb128 {
133
168
( $this_fn: ident, $int_ty: ty, $write_leb_fn: ident) => {
134
169
#[ inline]
135
170
fn $this_fn( & mut self , v: $int_ty) {
136
- const MAX_ENCODED_LEN : usize = $crate:: leb128:: max_leb128_len:: <$int_ty>( ) ;
137
- self . write_with:: <MAX_ENCODED_LEN , _>( |buf| leb128:: $write_leb_fn( buf, v) )
171
+ self . write_with( |buf| leb128:: $write_leb_fn( buf, v) )
138
172
}
139
173
} ;
140
174
}
@@ -147,18 +181,12 @@ impl Encoder for FileEncoder {
147
181
148
182
#[ inline]
149
183
fn emit_u16 ( & mut self , v : u16 ) {
150
- self . write_with ( |buf| {
151
- * buf = v. to_le_bytes ( ) ;
152
- 2
153
- } ) ;
184
+ self . write_array ( v. to_le_bytes ( ) ) ;
154
185
}
155
186
156
187
#[ inline]
157
188
fn emit_u8 ( & mut self , v : u8 ) {
158
- self . write_with ( |buf : & mut [ u8 ; 1 ] | {
159
- buf[ 0 ] = v;
160
- 1
161
- } ) ;
189
+ self . write_array ( [ v] ) ;
162
190
}
163
191
164
192
write_leb128 ! ( emit_isize, isize , write_isize_leb128) ;
@@ -168,10 +196,7 @@ impl Encoder for FileEncoder {
168
196
169
197
#[ inline]
170
198
fn emit_i16 ( & mut self , v : i16 ) {
171
- self . write_with ( |buf| {
172
- * buf = v. to_le_bytes ( ) ;
173
- 2
174
- } ) ;
199
+ self . write_array ( v. to_le_bytes ( ) ) ;
175
200
}
176
201
177
202
#[ inline]
@@ -370,10 +395,7 @@ impl Encodable<FileEncoder> for IntEncodedWithFixedSize {
370
395
#[ inline]
371
396
fn encode ( & self , e : & mut FileEncoder ) {
372
397
let _start_pos = e. position ( ) ;
373
- e. write_with ( |buf| {
374
- * buf = self . 0 . to_le_bytes ( ) ;
375
- buf. len ( )
376
- } ) ;
398
+ e. write_array ( self . 0 . to_le_bytes ( ) ) ;
377
399
let _end_pos = e. position ( ) ;
378
400
debug_assert_eq ! ( ( _end_pos - _start_pos) , IntEncodedWithFixedSize :: ENCODED_SIZE ) ;
379
401
}
0 commit comments