Skip to content

Commit fabdc1a

Browse files
authored
Merge pull request #348 from sadlerap/optimize-popcount
optimize popcount implementation
2 parents c80fb4a + 64abf58 commit fabdc1a

File tree

1 file changed

+39
-61
lines changed

1 file changed

+39
-61
lines changed

src/intrinsic/mod.rs

+39-61
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ mod simd;
44
#[cfg(feature="master")]
55
use std::iter;
66

7-
use gccjit::{ComparisonOp, Function, RValue, ToRValue, Type, UnaryOp, FunctionType};
7+
use gccjit::{BinaryOp, ComparisonOp, Function, RValue, ToRValue, Type, UnaryOp, FunctionType};
88
use rustc_codegen_ssa::MemFlags;
99
use rustc_codegen_ssa::base::wants_msvc_seh;
1010
use rustc_codegen_ssa::common::IntPredicate;
@@ -820,74 +820,52 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
820820
};
821821

822822
if value_type.is_u128(&self.cx) {
823-
// TODO(antoyo): implement in the normal algorithm below to have a more efficient
824-
// implementation (that does not require a call to __popcountdi2).
825-
let popcount = self.context.get_builtin_function("__builtin_popcountll");
826823
let sixty_four = self.gcc_int(value_type, 64);
827824
let right_shift = self.gcc_lshr(value, sixty_four);
828825
let high = self.gcc_int_cast(right_shift, self.cx.ulonglong_type);
829-
let high = self.context.new_call(None, popcount, &[high]);
826+
let high = self.pop_count(high);
830827
let low = self.gcc_int_cast(value, self.cx.ulonglong_type);
831-
let low = self.context.new_call(None, popcount, &[low]);
828+
let low = self.pop_count(low);
832829
let res = high + low;
833830
return self.gcc_int_cast(res, result_type);
834831
}
835832

836-
// First step.
837-
let mask = self.context.new_rvalue_from_long(value_type, 0x5555555555555555);
838-
let left = value & mask;
839-
let shifted = value >> self.context.new_rvalue_from_int(value_type, 1);
840-
let right = shifted & mask;
841-
let value = left + right;
842-
843-
// Second step.
844-
let mask = self.context.new_rvalue_from_long(value_type, 0x3333333333333333);
845-
let left = value & mask;
846-
let shifted = value >> self.context.new_rvalue_from_int(value_type, 2);
847-
let right = shifted & mask;
848-
let value = left + right;
849-
850-
// Third step.
851-
let mask = self.context.new_rvalue_from_long(value_type, 0x0F0F0F0F0F0F0F0F);
852-
let left = value & mask;
853-
let shifted = value >> self.context.new_rvalue_from_int(value_type, 4);
854-
let right = shifted & mask;
855-
let value = left + right;
856-
857-
if value_type.is_u8(&self.cx) {
858-
return self.context.new_cast(None, value, result_type);
859-
}
860-
861-
// Fourth step.
862-
let mask = self.context.new_rvalue_from_long(value_type, 0x00FF00FF00FF00FF);
863-
let left = value & mask;
864-
let shifted = value >> self.context.new_rvalue_from_int(value_type, 8);
865-
let right = shifted & mask;
866-
let value = left + right;
867-
868-
if value_type.is_u16(&self.cx) {
869-
return self.context.new_cast(None, value, result_type);
870-
}
871-
872-
// Fifth step.
873-
let mask = self.context.new_rvalue_from_long(value_type, 0x0000FFFF0000FFFF);
874-
let left = value & mask;
875-
let shifted = value >> self.context.new_rvalue_from_int(value_type, 16);
876-
let right = shifted & mask;
877-
let value = left + right;
878-
879-
if value_type.is_u32(&self.cx) {
880-
return self.context.new_cast(None, value, result_type);
881-
}
882-
883-
// Sixth step.
884-
let mask = self.context.new_rvalue_from_long(value_type, 0x00000000FFFFFFFF);
885-
let left = value & mask;
886-
let shifted = value >> self.context.new_rvalue_from_int(value_type, 32);
887-
let right = shifted & mask;
888-
let value = left + right;
889-
890-
self.context.new_cast(None, value, result_type)
833+
// Use Wenger's algorithm for population count, gcc's seems to play better with it
834+
// for (int counter = 0; value != 0; counter++) {
835+
// value &= value - 1;
836+
// }
837+
let func = self.current_func.borrow().expect("func");
838+
let loop_head = func.new_block("head");
839+
let loop_body = func.new_block("body");
840+
let loop_tail = func.new_block("tail");
841+
842+
let counter_type = self.int_type;
843+
let counter = self.current_func().new_local(None, counter_type, "popcount_counter");
844+
let val = self.current_func().new_local(None, value_type, "popcount_value");
845+
let zero = self.context.new_rvalue_zero(counter_type);
846+
self.llbb().add_assignment(None, counter, zero);
847+
self.llbb().add_assignment(None, val, value);
848+
self.br(loop_head);
849+
850+
// check if value isn't zero
851+
self.switch_to_block(loop_head);
852+
let zero = self.context.new_rvalue_zero(value_type);
853+
let cond = self.context.new_comparison(None, ComparisonOp::NotEquals, val.to_rvalue(), zero);
854+
self.cond_br(cond, loop_body, loop_tail);
855+
856+
// val &= val - 1;
857+
self.switch_to_block(loop_body);
858+
let sub = val.to_rvalue() - self.context.new_rvalue_one(value_type);
859+
loop_body.add_assignment_op(None, val, BinaryOp::BitwiseAnd, sub);
860+
861+
// counter += 1
862+
let one = self.context.new_rvalue_one(counter_type);
863+
loop_body.add_assignment_op(None, counter, BinaryOp::Plus, one);
864+
self.br(loop_head);
865+
866+
// end of loop
867+
self.switch_to_block(loop_tail);
868+
self.context.new_cast(None, counter.to_rvalue(), result_type)
891869
}
892870

893871
// Algorithm from: https://blog.regehr.org/archives/1063

0 commit comments

Comments
 (0)