@@ -833,44 +833,76 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
833
833
}
834
834
835
835
fn pop_count ( & self , value : RValue < ' gcc > ) -> RValue < ' gcc > {
836
- // FIXME: this seems to generate a call to a function from a library that is not linked by
837
- // core, but linked by std.
836
+ // TODO: use the optimized version with fewer operations.
838
837
let value_type = value. get_type ( ) ;
839
- let ( popcount, expected_type) =
840
- if value_type. is_uchar ( & self . cx ) || value_type. is_ushort ( & self . cx ) || value_type. is_uint ( & self . cx ) {
841
- // TODO: implement more efficient version for uchar and ushort?
842
- ( "__builtin_popcount" , self . cx . uint_type )
843
- }
844
- else if value_type. is_ulong ( & self . cx ) {
845
- ( "__builtin_popcountl" , self . cx . ulong_type )
846
- }
847
- else if value_type. is_ulonglong ( & self . cx ) {
848
- ( "__builtin_popcountll" , self . cx . ulonglong_type )
849
- }
850
- else if value_type. is_u128 ( & self . cx ) {
851
- // TODO: maybe there's a more efficient implementation.
852
- let popcount = self . context . get_builtin_function ( "__builtin_popcountll" ) ;
853
- let sixty_four = self . context . new_rvalue_from_long ( value_type, 64 ) ;
854
- let high = self . context . new_cast ( None , value >> sixty_four, self . cx . ulonglong_type ) ;
855
- let high = self . context . new_call ( None , popcount, & [ high] ) ;
856
- let low = self . context . new_cast ( None , value, self . cx . ulonglong_type ) ;
857
- let low = self . context . new_call ( None , popcount, & [ low] ) ;
858
- return high + low;
859
- }
860
- else {
861
- unimplemented ! ( "popcount for {:?}" , value_type) ;
862
- } ;
863
838
864
- let popcount = self . context . get_builtin_function ( popcount) ;
839
+ if value_type. is_u128 ( & self . cx ) {
840
+ // TODO: implement in the normal algorithm below to have a more efficient
841
+ // implementation (that does not require a call to __popcountdi2).
842
+ let popcount = self . context . get_builtin_function ( "__builtin_popcountll" ) ;
843
+ let sixty_four = self . context . new_rvalue_from_long ( value_type, 64 ) ;
844
+ let high = self . context . new_cast ( None , value >> sixty_four, self . cx . ulonglong_type ) ;
845
+ let high = self . context . new_call ( None , popcount, & [ high] ) ;
846
+ let low = self . context . new_cast ( None , value, self . cx . ulonglong_type ) ;
847
+ let low = self . context . new_call ( None , popcount, & [ low] ) ;
848
+ return high + low;
849
+ }
865
850
866
- let value =
867
- if value_type != expected_type {
868
- self . context . new_cast ( None , value, expected_type)
869
- }
870
- else {
871
- value
872
- } ;
873
- self . context . new_call ( None , popcount, & [ value] )
851
+ // First step.
852
+ let mask = self . context . new_rvalue_from_long ( value_type, 0x5555555555555555 ) ;
853
+ let left = value & mask;
854
+ let shifted = value >> self . context . new_rvalue_from_int ( value_type, 1 ) ;
855
+ let right = shifted & mask;
856
+ let value = left + right;
857
+
858
+ // Second step.
859
+ let mask = self . context . new_rvalue_from_long ( value_type, 0x3333333333333333 ) ;
860
+ let left = value & mask;
861
+ let shifted = value >> self . context . new_rvalue_from_int ( value_type, 2 ) ;
862
+ let right = shifted & mask;
863
+ let value = left + right;
864
+
865
+ // Third step.
866
+ let mask = self . context . new_rvalue_from_long ( value_type, 0x0F0F0F0F0F0F0F0F ) ;
867
+ let left = value & mask;
868
+ let shifted = value >> self . context . new_rvalue_from_int ( value_type, 4 ) ;
869
+ let right = shifted & mask;
870
+ let value = left + right;
871
+
872
+ if value_type. is_u8 ( & self . cx ) {
873
+ return value;
874
+ }
875
+
876
+ // Fourth step.
877
+ let mask = self . context . new_rvalue_from_long ( value_type, 0x00FF00FF00FF00FF ) ;
878
+ let left = value & mask;
879
+ let shifted = value >> self . context . new_rvalue_from_int ( value_type, 8 ) ;
880
+ let right = shifted & mask;
881
+ let value = left + right;
882
+
883
+ if value_type. is_u16 ( & self . cx ) {
884
+ return value;
885
+ }
886
+
887
+ // Fifth step.
888
+ let mask = self . context . new_rvalue_from_long ( value_type, 0x0000FFFF0000FFFF ) ;
889
+ let left = value & mask;
890
+ let shifted = value >> self . context . new_rvalue_from_int ( value_type, 16 ) ;
891
+ let right = shifted & mask;
892
+ let value = left + right;
893
+
894
+ if value_type. is_u32 ( & self . cx ) {
895
+ return value;
896
+ }
897
+
898
+ // Sixth step.
899
+ let mask = self . context . new_rvalue_from_long ( value_type, 0x00000000FFFFFFFF ) ;
900
+ let left = value & mask;
901
+ let shifted = value >> self . context . new_rvalue_from_int ( value_type, 32 ) ;
902
+ let right = shifted & mask;
903
+ let value = left + right;
904
+
905
+ value
874
906
}
875
907
876
908
// Algorithm from: https://blog.regehr.org/archives/1063
0 commit comments