@@ -20,7 +20,7 @@ use syntax::ast::Mutability;
20
20
use super :: {
21
21
Pointer , AllocId , Allocation , GlobalId , AllocationExtra ,
22
22
EvalResult , Scalar , EvalErrorKind , AllocKind , PointerArithmetic ,
23
- Machine , AllocMap , MayLeak , ErrorHandled , InboundsCheck , UndefMask ,
23
+ Machine , AllocMap , MayLeak , ErrorHandled , InboundsCheck ,
24
24
} ;
25
25
26
26
#[ derive( Debug , PartialEq , Eq , Copy , Clone , Hash ) ]
@@ -789,38 +789,58 @@ impl<'a, 'mir, 'tcx, M: Machine<'a, 'mir, 'tcx>> Memory<'a, 'mir, 'tcx, M> {
789
789
// The bits have to be saved locally before writing to dest in case src and dest overlap.
790
790
assert_eq ! ( size. bytes( ) as usize as u64 , size. bytes( ) ) ;
791
791
792
- let undef_mask = self . get ( src. alloc_id ) ?. undef_mask . clone ( ) ;
793
- let get = |i| undef_mask. get ( src. offset + Size :: from_bytes ( i) ) ;
794
- let dest_allocation = self . get_mut ( dest. alloc_id ) ?;
792
+ let undef_mask = & self . get ( src. alloc_id ) ?. undef_mask ;
793
+
794
+ // a precomputed cache for ranges of defined/undefined bits
795
+ // 0000010010001110 will become
796
+ // [5, 1, 2, 1, 3, 3, 1]
797
+ // where each element toggles the state
798
+ let mut ranges = smallvec:: SmallVec :: < [ u64 ; 1 ] > :: new ( ) ;
799
+ let first = undef_mask. get ( src. offset ) ;
800
+ let mut cur_len = 1 ;
801
+ let mut cur = first;
802
+ for i in 1 ..size. bytes ( ) {
803
+ // FIXME: optimize to bitshift the current undef block's bits and read the top bit
804
+ if undef_mask. get ( src. offset + Size :: from_bytes ( i) ) == cur {
805
+ cur_len += 1 ;
806
+ } else {
807
+ ranges. push ( cur_len) ;
808
+ cur_len = 1 ;
809
+ cur = !cur;
810
+ }
811
+ }
795
812
813
+ // now fill in all the data
814
+ let dest_allocation = self . get_mut ( dest. alloc_id ) ?;
796
815
// an optimization where we can just overwrite an entire range of definedness bits if
797
816
// they are going to be uniformly `1` or `0`.
798
- if size. bytes ( ) * repeat > UndefMask :: BLOCK_SIZE {
799
- let first = undef_mask. get ( src. offset ) ;
800
- // check that all bits are the same as the first bit
801
- // FIXME(oli-obk): consider making this a function on `UndefMask` and optimize it, too
802
- if ( 1 ..size. bytes ( ) ) . all ( |i| get ( i) == first) {
803
- dest_allocation. undef_mask . set_range (
804
- dest. offset ,
805
- dest. offset + size * repeat,
806
- first,
807
- ) ;
808
- return Ok ( ( ) )
809
- }
817
+ if ranges. is_empty ( ) {
818
+ dest_allocation. undef_mask . set_range (
819
+ dest. offset ,
820
+ dest. offset + size * repeat,
821
+ first,
822
+ ) ;
823
+ return Ok ( ( ) )
810
824
}
811
825
812
- // the default path
813
- for i in 0 ..size. bytes ( ) {
814
- let defined = get ( i) ;
815
-
816
- for j in 0 ..repeat {
817
- dest_allocation. undef_mask . set (
818
- dest. offset + Size :: from_bytes ( i + ( size. bytes ( ) * j) ) ,
819
- defined
826
+ // remember to fill in the trailing bits
827
+ ranges. push ( cur_len) ;
828
+
829
+ for mut j in 0 ..repeat {
830
+ j *= size. bytes ( ) ;
831
+ j += dest. offset . bytes ( ) ;
832
+ let mut cur = first;
833
+ for range in & ranges {
834
+ let old_j = j;
835
+ j += range;
836
+ dest_allocation. undef_mask . set_range_inbounds (
837
+ Size :: from_bytes ( old_j) ,
838
+ Size :: from_bytes ( j) ,
839
+ cur,
820
840
) ;
841
+ cur = !cur;
821
842
}
822
843
}
823
-
824
844
Ok ( ( ) )
825
845
}
826
846
}
0 commit comments