Skip to content

Commit 6f2db1e

Browse files
committed
Implement popcount intrinsic manually
1 parent 7de70f4 commit 6f2db1e

File tree

1 file changed

+67
-35
lines changed

1 file changed

+67
-35
lines changed

src/intrinsic/mod.rs

Lines changed: 67 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -833,44 +833,76 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
833833
}
834834

835835
fn pop_count(&self, value: RValue<'gcc>) -> RValue<'gcc> {
836-
// FIXME: this seems to generate a call to a function from a library that is not linked by
837-
// core, but linked by std.
836+
// TODO: use the optimized version with fewer operations.
838837
let value_type = value.get_type();
839-
let (popcount, expected_type) =
840-
if value_type.is_uchar(&self.cx) || value_type.is_ushort(&self.cx) || value_type.is_uint(&self.cx) {
841-
// TODO: implement more efficient version for uchar and ushort?
842-
("__builtin_popcount", self.cx.uint_type)
843-
}
844-
else if value_type.is_ulong(&self.cx) {
845-
("__builtin_popcountl", self.cx.ulong_type)
846-
}
847-
else if value_type.is_ulonglong(&self.cx) {
848-
("__builtin_popcountll", self.cx.ulonglong_type)
849-
}
850-
else if value_type.is_u128(&self.cx) {
851-
// TODO: maybe there's a more efficient implementation.
852-
let popcount = self.context.get_builtin_function("__builtin_popcountll");
853-
let sixty_four = self.context.new_rvalue_from_long(value_type, 64);
854-
let high = self.context.new_cast(None, value >> sixty_four, self.cx.ulonglong_type);
855-
let high = self.context.new_call(None, popcount, &[high]);
856-
let low = self.context.new_cast(None, value, self.cx.ulonglong_type);
857-
let low = self.context.new_call(None, popcount, &[low]);
858-
return high + low;
859-
}
860-
else {
861-
unimplemented!("popcount for {:?}", value_type);
862-
};
863838

864-
let popcount = self.context.get_builtin_function(popcount);
839+
if value_type.is_u128(&self.cx) {
840+
// TODO: implement in the normal algorithm below to have a more efficient
841+
// implementation (that does not require a call to __popcountdi2).
842+
let popcount = self.context.get_builtin_function("__builtin_popcountll");
843+
let sixty_four = self.context.new_rvalue_from_long(value_type, 64);
844+
let high = self.context.new_cast(None, value >> sixty_four, self.cx.ulonglong_type);
845+
let high = self.context.new_call(None, popcount, &[high]);
846+
let low = self.context.new_cast(None, value, self.cx.ulonglong_type);
847+
let low = self.context.new_call(None, popcount, &[low]);
848+
return high + low;
849+
}
865850

866-
let value =
867-
if value_type != expected_type {
868-
self.context.new_cast(None, value, expected_type)
869-
}
870-
else {
871-
value
872-
};
873-
self.context.new_call(None, popcount, &[value])
851+
// First step.
852+
let mask = self.context.new_rvalue_from_long(value_type, 0x55555555);
853+
let left = value & mask;
854+
let shifted = value >> self.context.new_rvalue_from_int(value_type, 1);
855+
let right = shifted & mask;
856+
let value = left + right;
857+
858+
// Second step.
859+
let mask = self.context.new_rvalue_from_long(value_type, 0x33333333);
860+
let left = value & mask;
861+
let shifted = value >> self.context.new_rvalue_from_int(value_type, 2);
862+
let right = shifted & mask;
863+
let value = left + right;
864+
865+
// Third step.
866+
let mask = self.context.new_rvalue_from_long(value_type, 0x0F0F0F0F);
867+
let left = value & mask;
868+
let shifted = value >> self.context.new_rvalue_from_int(value_type, 4);
869+
let right = shifted & mask;
870+
let value = left + right;
871+
872+
if value_type.is_u8(&self.cx) {
873+
return value;
874+
}
875+
876+
// Fourth step.
877+
let mask = self.context.new_rvalue_from_long(value_type, 0x00FF00FF);
878+
let left = value & mask;
879+
let shifted = value >> self.context.new_rvalue_from_int(value_type, 8);
880+
let right = shifted & mask;
881+
let value = left + right;
882+
883+
if value_type.is_u16(&self.cx) {
884+
return value;
885+
}
886+
887+
// Fifth step.
888+
let mask = self.context.new_rvalue_from_long(value_type, 0x0000FFFF);
889+
let left = value & mask;
890+
let shifted = value >> self.context.new_rvalue_from_int(value_type, 16);
891+
let right = shifted & mask;
892+
let value = left + right;
893+
894+
if value_type.is_u32(&self.cx) {
895+
return value;
896+
}
897+
898+
// Sixth step.
899+
let mask = self.context.new_rvalue_from_long(value_type, 0x00000000FFFFFFFF);
900+
let left = value & mask;
901+
let shifted = value >> self.context.new_rvalue_from_int(value_type, 32);
902+
let right = shifted & mask;
903+
let value = left + right;
904+
905+
value
874906
}
875907

876908
// Algorithm from: https://blog.regehr.org/archives/1063

0 commit comments

Comments
 (0)