Skip to content

Commit 8b191d7

Browse files
authored
Implement popcount intrinsic manually (#43)
1 parent 5019cc4 commit 8b191d7

File tree

2 files changed

+74
-52
lines changed

2 files changed

+74
-52
lines changed

gcc-test-backend/src/main.rs

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,11 @@
1-
#![feature(link_llvm_intrinsics)]
2-
3-
use core::arch::x86_64::__m128i;
4-
5-
unsafe fn _mm_movemask_epi8(a: i128) -> i32 {
6-
pmovmskb(a)
7-
}
8-
9-
#[allow(improper_ctypes)]
10-
extern "C" {
11-
#[link_name = "llvm.x86.sse2.pmovmskb.128"]
12-
fn pmovmskb(a: i128) -> i32;
13-
}
14-
151
fn main() {
16-
unsafe {
17-
_mm_movemask_epi8(12);
18-
}
2+
const A: i64 = 0b0101100;
3+
const B: i64 = 0b0100001;
4+
const C: i64 = 0b1111001;
5+
6+
assert_eq!(A.count_zeros(), i64::BITS - 3);
7+
assert_eq!(B.count_zeros(), i64::BITS - 2);
8+
assert_eq!(C.count_zeros(), i64::BITS - 5);
199
}
2010

2111
/*#![feature(core_intrinsics, generators, generator_trait, is_sorted)]

src/intrinsic/mod.rs

Lines changed: 67 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -833,44 +833,76 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
833833
}
834834

835835
fn pop_count(&self, value: RValue<'gcc>) -> RValue<'gcc> {
836-
// FIXME: this seems to generate a call to a function from a library that is not linked by
837-
// core, but linked by std.
836+
// TODO: use the optimized version with fewer operations.
838837
let value_type = value.get_type();
839-
let (popcount, expected_type) =
840-
if value_type.is_uchar(&self.cx) || value_type.is_ushort(&self.cx) || value_type.is_uint(&self.cx) {
841-
// TODO: implement more efficient version for uchar and ushort?
842-
("__builtin_popcount", self.cx.uint_type)
843-
}
844-
else if value_type.is_ulong(&self.cx) {
845-
("__builtin_popcountl", self.cx.ulong_type)
846-
}
847-
else if value_type.is_ulonglong(&self.cx) {
848-
("__builtin_popcountll", self.cx.ulonglong_type)
849-
}
850-
else if value_type.is_u128(&self.cx) {
851-
// TODO: maybe there's a more efficient implementation.
852-
let popcount = self.context.get_builtin_function("__builtin_popcountll");
853-
let sixty_four = self.context.new_rvalue_from_long(value_type, 64);
854-
let high = self.context.new_cast(None, value >> sixty_four, self.cx.ulonglong_type);
855-
let high = self.context.new_call(None, popcount, &[high]);
856-
let low = self.context.new_cast(None, value, self.cx.ulonglong_type);
857-
let low = self.context.new_call(None, popcount, &[low]);
858-
return high + low;
859-
}
860-
else {
861-
unimplemented!("popcount for {:?}", value_type);
862-
};
863838

864-
let popcount = self.context.get_builtin_function(popcount);
839+
if value_type.is_u128(&self.cx) {
840+
// TODO: implement in the normal algorithm below to have a more efficient
841+
// implementation (that does not require a call to __popcountdi2).
842+
let popcount = self.context.get_builtin_function("__builtin_popcountll");
843+
let sixty_four = self.context.new_rvalue_from_long(value_type, 64);
844+
let high = self.context.new_cast(None, value >> sixty_four, self.cx.ulonglong_type);
845+
let high = self.context.new_call(None, popcount, &[high]);
846+
let low = self.context.new_cast(None, value, self.cx.ulonglong_type);
847+
let low = self.context.new_call(None, popcount, &[low]);
848+
return high + low;
849+
}
865850

866-
let value =
867-
if value_type != expected_type {
868-
self.context.new_cast(None, value, expected_type)
869-
}
870-
else {
871-
value
872-
};
873-
self.context.new_call(None, popcount, &[value])
851+
// First step.
852+
let mask = self.context.new_rvalue_from_long(value_type, 0x5555555555555555);
853+
let left = value & mask;
854+
let shifted = value >> self.context.new_rvalue_from_int(value_type, 1);
855+
let right = shifted & mask;
856+
let value = left + right;
857+
858+
// Second step.
859+
let mask = self.context.new_rvalue_from_long(value_type, 0x3333333333333333);
860+
let left = value & mask;
861+
let shifted = value >> self.context.new_rvalue_from_int(value_type, 2);
862+
let right = shifted & mask;
863+
let value = left + right;
864+
865+
// Third step.
866+
let mask = self.context.new_rvalue_from_long(value_type, 0x0F0F0F0F0F0F0F0F);
867+
let left = value & mask;
868+
let shifted = value >> self.context.new_rvalue_from_int(value_type, 4);
869+
let right = shifted & mask;
870+
let value = left + right;
871+
872+
if value_type.is_u8(&self.cx) {
873+
return value;
874+
}
875+
876+
// Fourth step.
877+
let mask = self.context.new_rvalue_from_long(value_type, 0x00FF00FF00FF00FF);
878+
let left = value & mask;
879+
let shifted = value >> self.context.new_rvalue_from_int(value_type, 8);
880+
let right = shifted & mask;
881+
let value = left + right;
882+
883+
if value_type.is_u16(&self.cx) {
884+
return value;
885+
}
886+
887+
// Fifth step.
888+
let mask = self.context.new_rvalue_from_long(value_type, 0x0000FFFF0000FFFF);
889+
let left = value & mask;
890+
let shifted = value >> self.context.new_rvalue_from_int(value_type, 16);
891+
let right = shifted & mask;
892+
let value = left + right;
893+
894+
if value_type.is_u32(&self.cx) {
895+
return value;
896+
}
897+
898+
// Sixth step.
899+
let mask = self.context.new_rvalue_from_long(value_type, 0x00000000FFFFFFFF);
900+
let left = value & mask;
901+
let shifted = value >> self.context.new_rvalue_from_int(value_type, 32);
902+
let right = shifted & mask;
903+
let value = left + right;
904+
905+
value
874906
}
875907

876908
// Algorithm from: https://blog.regehr.org/archives/1063

0 commit comments

Comments
 (0)