@@ -1209,21 +1209,28 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
1209
1209
throw_ub_custom ! ( fluent:: const_eval_copy_nonoverlapping_overlapping) ;
1210
1210
}
1211
1211
}
1212
+ }
1212
1213
1213
- for i in 0 ..num_copies {
1214
- ptr:: copy (
1215
- src_bytes,
1216
- dest_bytes. add ( ( size * i) . bytes_usize ( ) ) , // `Size` multiplication
1217
- size. bytes_usize ( ) ,
1218
- ) ;
1214
+ let size_in_bytes = size. bytes_usize ( ) ;
1215
+ // For particularly large arrays (where this is perf-sensitive) it's common that
1216
+ // we're writing a single byte repeatedly. So, optimize that case to a memset.
1217
+ if size_in_bytes == 1 && num_copies >= 1 {
1218
+ // SAFETY: `src_bytes` would be read from anyway by copies below (num_copies >= 1).
1219
+ // Since size_in_bytes = 1, then the `init.no_bytes_init()` check above guarantees
1220
+ // that this read at type `u8` is OK -- it must be an initialized byte.
1221
+ let value = * src_bytes;
1222
+ dest_bytes. write_bytes ( value, ( size * num_copies) . bytes_usize ( ) ) ;
1223
+ } else if src_alloc_id == dest_alloc_id {
1224
+ let mut dest_ptr = dest_bytes;
1225
+ for _ in 0 ..num_copies {
1226
+ ptr:: copy ( src_bytes, dest_ptr, size_in_bytes) ;
1227
+ dest_ptr = dest_ptr. add ( size_in_bytes) ;
1219
1228
}
1220
1229
} else {
1221
- for i in 0 ..num_copies {
1222
- ptr:: copy_nonoverlapping (
1223
- src_bytes,
1224
- dest_bytes. add ( ( size * i) . bytes_usize ( ) ) , // `Size` multiplication
1225
- size. bytes_usize ( ) ,
1226
- ) ;
1230
+ let mut dest_ptr = dest_bytes;
1231
+ for _ in 0 ..num_copies {
1232
+ ptr:: copy_nonoverlapping ( src_bytes, dest_ptr, size_in_bytes) ;
1233
+ dest_ptr = dest_ptr. add ( size_in_bytes) ;
1227
1234
}
1228
1235
}
1229
1236
}
0 commit comments