@@ -4,7 +4,7 @@ mod simd;
4
4
#[ cfg( feature="master" ) ]
5
5
use std:: iter;
6
6
7
- use gccjit:: { ComparisonOp , Function , RValue , ToRValue , Type , UnaryOp , FunctionType } ;
7
+ use gccjit:: { BinaryOp , ComparisonOp , Function , RValue , ToRValue , Type , UnaryOp , FunctionType } ;
8
8
use rustc_codegen_ssa:: MemFlags ;
9
9
use rustc_codegen_ssa:: base:: wants_msvc_seh;
10
10
use rustc_codegen_ssa:: common:: IntPredicate ;
@@ -819,75 +819,43 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
819
819
value
820
820
} ;
821
821
822
- if value_type. is_u128 ( & self . cx ) {
823
- // TODO(antoyo): implement in the normal algorithm below to have a more efficient
824
- // implementation (that does not require a call to __popcountdi2).
825
- let popcount = self . context . get_builtin_function ( "__builtin_popcountll" ) ;
826
- let sixty_four = self . gcc_int ( value_type, 64 ) ;
827
- let right_shift = self . gcc_lshr ( value, sixty_four) ;
828
- let high = self . gcc_int_cast ( right_shift, self . cx . ulonglong_type ) ;
829
- let high = self . context . new_call ( None , popcount, & [ high] ) ;
830
- let low = self . gcc_int_cast ( value, self . cx . ulonglong_type ) ;
831
- let low = self . context . new_call ( None , popcount, & [ low] ) ;
832
- let res = high + low;
833
- return self . gcc_int_cast ( res, result_type) ;
834
- }
835
-
836
- // First step.
837
- let mask = self . context . new_rvalue_from_long ( value_type, 0x5555555555555555 ) ;
838
- let left = value & mask;
839
- let shifted = value >> self . context . new_rvalue_from_int ( value_type, 1 ) ;
840
- let right = shifted & mask;
841
- let value = left + right;
842
-
843
- // Second step.
844
- let mask = self . context . new_rvalue_from_long ( value_type, 0x3333333333333333 ) ;
845
- let left = value & mask;
846
- let shifted = value >> self . context . new_rvalue_from_int ( value_type, 2 ) ;
847
- let right = shifted & mask;
848
- let value = left + right;
849
-
850
- // Third step.
851
- let mask = self . context . new_rvalue_from_long ( value_type, 0x0F0F0F0F0F0F0F0F ) ;
852
- let left = value & mask;
853
- let shifted = value >> self . context . new_rvalue_from_int ( value_type, 4 ) ;
854
- let right = shifted & mask;
855
- let value = left + right;
856
-
857
- if value_type. is_u8 ( & self . cx ) {
858
- return self . context . new_cast ( None , value, result_type) ;
859
- }
860
-
861
- // Fourth step.
862
- let mask = self . context . new_rvalue_from_long ( value_type, 0x00FF00FF00FF00FF ) ;
863
- let left = value & mask;
864
- let shifted = value >> self . context . new_rvalue_from_int ( value_type, 8 ) ;
865
- let right = shifted & mask;
866
- let value = left + right;
867
-
868
- if value_type. is_u16 ( & self . cx ) {
869
- return self . context . new_cast ( None , value, result_type) ;
870
- }
871
-
872
- // Fifth step.
873
- let mask = self . context . new_rvalue_from_long ( value_type, 0x0000FFFF0000FFFF ) ;
874
- let left = value & mask;
875
- let shifted = value >> self . context . new_rvalue_from_int ( value_type, 16 ) ;
876
- let right = shifted & mask;
877
- let value = left + right;
878
-
879
- if value_type. is_u32 ( & self . cx ) {
880
- return self . context . new_cast ( None , value, result_type) ;
881
- }
882
-
883
- // Sixth step.
884
- let mask = self . context . new_rvalue_from_long ( value_type, 0x00000000FFFFFFFF ) ;
885
- let left = value & mask;
886
- let shifted = value >> self . context . new_rvalue_from_int ( value_type, 32 ) ;
887
- let right = shifted & mask;
888
- let value = left + right;
889
-
890
- self . context . new_cast ( None , value, result_type)
822
+ // Use Wenger's algorithm for population count, gcc's seems to play better with it
823
+ // for (int counter = 0; value != 0; counter++) {
824
+ // value &= value - 1;
825
+ // }
826
+ let func = self . current_func . borrow ( ) . expect ( "func" ) ;
827
+ let loop_head = func. new_block ( "head" ) ;
828
+ let loop_body = func. new_block ( "body" ) ;
829
+ let loop_tail = func. new_block ( "tail" ) ;
830
+
831
+ // gcc seems to optimize better if we use an int here
832
+ let counter_type = self . cx . int_type ;
833
+ let counter = self . current_func ( ) . new_local ( None , counter_type, "popcount_counter" ) ;
834
+ let val = self . current_func ( ) . new_local ( None , value_type, "popcount_value" ) ;
835
+ let zero = self . cx . gcc_zero ( counter_type) ;
836
+ self . llbb ( ) . add_assignment ( None , counter, zero) ;
837
+ self . llbb ( ) . add_assignment ( None , val, value) ;
838
+ self . br ( loop_head) ;
839
+
840
+ // check if value isn't zero
841
+ self . switch_to_block ( loop_head) ;
842
+ let zero = self . cx . gcc_zero ( value_type) ;
843
+ let cond = self . gcc_icmp ( IntPredicate :: IntNE , val. to_rvalue ( ) , zero) ;
844
+ self . cond_br ( cond, loop_body, loop_tail) ;
845
+
846
+ // val &= val - 1;
847
+ self . switch_to_block ( loop_body) ;
848
+ let sub = val. to_rvalue ( ) - self . context . new_rvalue_one ( value_type) ;
849
+ loop_body. add_assignment_op ( None , val, BinaryOp :: BitwiseAnd , sub) ;
850
+
851
+ // counter += 1
852
+ let one = self . context . new_rvalue_one ( counter_type) ;
853
+ loop_body. add_assignment_op ( None , counter, BinaryOp :: Plus , one) ;
854
+ self . br ( loop_head) ;
855
+
856
+ // end of loop
857
+ self . switch_to_block ( loop_tail) ;
858
+ self . gcc_int_cast ( counter. to_rvalue ( ) , result_type)
891
859
}
892
860
893
861
// Algorithm from: https://blog.regehr.org/archives/1063
0 commit comments