@@ -4,7 +4,7 @@ mod simd;
4
4
#[ cfg( feature="master" ) ]
5
5
use std:: iter;
6
6
7
- use gccjit:: { ComparisonOp , Function , RValue , ToRValue , Type , UnaryOp , FunctionType } ;
7
+ use gccjit:: { BinaryOp , ComparisonOp , Function , RValue , ToRValue , Type , UnaryOp , FunctionType } ;
8
8
use rustc_codegen_ssa:: MemFlags ;
9
9
use rustc_codegen_ssa:: base:: wants_msvc_seh;
10
10
use rustc_codegen_ssa:: common:: IntPredicate ;
@@ -820,74 +820,52 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
820
820
} ;
821
821
822
822
if value_type. is_u128 ( & self . cx ) {
823
- // TODO(antoyo): implement in the normal algorithm below to have a more efficient
824
- // implementation (that does not require a call to __popcountdi2).
825
- let popcount = self . context . get_builtin_function ( "__builtin_popcountll" ) ;
826
823
let sixty_four = self . gcc_int ( value_type, 64 ) ;
827
824
let right_shift = self . gcc_lshr ( value, sixty_four) ;
828
825
let high = self . gcc_int_cast ( right_shift, self . cx . ulonglong_type ) ;
829
- let high = self . context . new_call ( None , popcount , & [ high] ) ;
826
+ let high = self . pop_count ( high) ;
830
827
let low = self . gcc_int_cast ( value, self . cx . ulonglong_type ) ;
831
- let low = self . context . new_call ( None , popcount , & [ low] ) ;
828
+ let low = self . pop_count ( low) ;
832
829
let res = high + low;
833
830
return self . gcc_int_cast ( res, result_type) ;
834
831
}
835
832
836
- // First step.
837
- let mask = self . context . new_rvalue_from_long ( value_type, 0x5555555555555555 ) ;
838
- let left = value & mask;
839
- let shifted = value >> self . context . new_rvalue_from_int ( value_type, 1 ) ;
840
- let right = shifted & mask;
841
- let value = left + right;
842
-
843
- // Second step.
844
- let mask = self . context . new_rvalue_from_long ( value_type, 0x3333333333333333 ) ;
845
- let left = value & mask;
846
- let shifted = value >> self . context . new_rvalue_from_int ( value_type, 2 ) ;
847
- let right = shifted & mask;
848
- let value = left + right;
849
-
850
- // Third step.
851
- let mask = self . context . new_rvalue_from_long ( value_type, 0x0F0F0F0F0F0F0F0F ) ;
852
- let left = value & mask;
853
- let shifted = value >> self . context . new_rvalue_from_int ( value_type, 4 ) ;
854
- let right = shifted & mask;
855
- let value = left + right;
856
-
857
- if value_type. is_u8 ( & self . cx ) {
858
- return self . context . new_cast ( None , value, result_type) ;
859
- }
860
-
861
- // Fourth step.
862
- let mask = self . context . new_rvalue_from_long ( value_type, 0x00FF00FF00FF00FF ) ;
863
- let left = value & mask;
864
- let shifted = value >> self . context . new_rvalue_from_int ( value_type, 8 ) ;
865
- let right = shifted & mask;
866
- let value = left + right;
867
-
868
- if value_type. is_u16 ( & self . cx ) {
869
- return self . context . new_cast ( None , value, result_type) ;
870
- }
871
-
872
- // Fifth step.
873
- let mask = self . context . new_rvalue_from_long ( value_type, 0x0000FFFF0000FFFF ) ;
874
- let left = value & mask;
875
- let shifted = value >> self . context . new_rvalue_from_int ( value_type, 16 ) ;
876
- let right = shifted & mask;
877
- let value = left + right;
878
-
879
- if value_type. is_u32 ( & self . cx ) {
880
- return self . context . new_cast ( None , value, result_type) ;
881
- }
882
-
883
- // Sixth step.
884
- let mask = self . context . new_rvalue_from_long ( value_type, 0x00000000FFFFFFFF ) ;
885
- let left = value & mask;
886
- let shifted = value >> self . context . new_rvalue_from_int ( value_type, 32 ) ;
887
- let right = shifted & mask;
888
- let value = left + right;
889
-
890
- self . context . new_cast ( None , value, result_type)
833
+ // Use Wenger's algorithm for population count, gcc's seems to play better with it
834
+ // for (int counter = 0; value != 0; counter++) {
835
+ // value &= value - 1;
836
+ // }
837
+ let func = self . current_func . borrow ( ) . expect ( "func" ) ;
838
+ let loop_head = func. new_block ( "head" ) ;
839
+ let loop_body = func. new_block ( "body" ) ;
840
+ let loop_tail = func. new_block ( "tail" ) ;
841
+
842
+ let counter_type = self . type_u32 ( ) ;
843
+ let counter = self . current_func ( ) . new_local ( None , counter_type, "popcount_counter" ) ;
844
+ let val = self . current_func ( ) . new_local ( None , value_type, "popcount_value" ) ;
845
+ let zero = self . cx . gcc_zero ( counter_type) ;
846
+ self . llbb ( ) . add_assignment ( None , counter, zero) ;
847
+ self . llbb ( ) . add_assignment ( None , val, value) ;
848
+ self . br ( loop_head) ;
849
+
850
+ // check if value isn't zero
851
+ self . switch_to_block ( loop_head) ;
852
+ let zero = self . cx . gcc_zero ( value_type) ;
853
+ let cond = self . gcc_icmp ( IntPredicate :: IntNE , val. to_rvalue ( ) , zero) ;
854
+ self . cond_br ( cond, loop_body, loop_tail) ;
855
+
856
+ // val &= val - 1;
857
+ self . switch_to_block ( loop_body) ;
858
+ let sub = val. to_rvalue ( ) - self . context . new_rvalue_one ( value_type) ;
859
+ loop_body. add_assignment_op ( None , val, BinaryOp :: BitwiseAnd , sub) ;
860
+
861
+ // counter += 1
862
+ let one = self . context . new_rvalue_one ( counter_type) ;
863
+ loop_body. add_assignment_op ( None , counter, BinaryOp :: Plus , one) ;
864
+ self . br ( loop_head) ;
865
+
866
+ // end of loop
867
+ self . switch_to_block ( loop_tail) ;
868
+ self . gcc_int_cast ( counter. to_rvalue ( ) , result_type)
891
869
}
892
870
893
871
// Algorithm from: https://blog.regehr.org/archives/1063
0 commit comments