Skip to content

Commit ade3739

Browse files
authored
Merge pull request rust-lang#257 from arpankapoor/master
Optimize bitreverse codegen
2 parents 08a6d6e + 68b8500 commit ade3739

File tree

3 files changed

+39
-124
lines changed

3 files changed

+39
-124
lines changed

example/mini_core_hello_world.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,9 @@ fn main() {
168168
world as Box<dyn SomeTrait>;
169169

170170
assert_eq!(intrinsics::bitreverse(0b10101000u8), 0b00010101u8);
171+
assert_eq!(intrinsics::bitreverse(0xddccu16), 0x33bbu16);
172+
assert_eq!(intrinsics::bitreverse(0xffee_ddccu32), 0x33bb77ffu32);
173+
assert_eq!(intrinsics::bitreverse(0x1234_5678_ffee_ddccu64), 0x33bb77ff1e6a2c48u64);
171174

172175
assert_eq!(intrinsics::bswap(0xabu8), 0xabu8);
173176
assert_eq!(intrinsics::bswap(0xddccu16), 0xccddu16);

example/std_example.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ fn main() {
5858

5959
assert_eq!(0b0000000000000000000000000010000010000000000000000000000000000000_0000000000100000000000000000000000001000000000000100000000000000u128.leading_zeros(), 26);
6060
assert_eq!(0b0000000000000000000000000010000000000000000000000000000000000000_0000000000000000000000000000000000001000000000000000000010000000u128.trailing_zeros(), 7);
61+
assert_eq!(0x1234_5678_ffee_ddcc_1234_5678_ffee_ddccu128.reverse_bits(), 0x33bb77ff1e6a2c4833bb77ff1e6a2c48u128);
6162

6263
let _d = 0i128.checked_div(2i128);
6364
let _d = 0u128.checked_div(2u128);

src/intrinsic/mod.rs

Lines changed: 35 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -549,141 +549,52 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
549549
let context = &self.cx.context;
550550
let result =
551551
match width {
552-
8 => {
553-
// First step.
554-
let left = self.and(value, context.new_rvalue_from_int(typ, 0xF0));
555-
let left = self.lshr(left, context.new_rvalue_from_int(typ, 4));
556-
let right = self.and(value, context.new_rvalue_from_int(typ, 0x0F));
557-
let right = self.shl(right, context.new_rvalue_from_int(typ, 4));
558-
let step1 = self.or(left, right);
559-
560-
// Second step.
561-
let left = self.and(step1, context.new_rvalue_from_int(typ, 0xCC));
562-
let left = self.lshr(left, context.new_rvalue_from_int(typ, 2));
563-
let right = self.and(step1, context.new_rvalue_from_int(typ, 0x33));
564-
let right = self.shl(right, context.new_rvalue_from_int(typ, 2));
565-
let step2 = self.or(left, right);
566-
567-
// Third step.
568-
let left = self.and(step2, context.new_rvalue_from_int(typ, 0xAA));
569-
let left = self.lshr(left, context.new_rvalue_from_int(typ, 1));
570-
let right = self.and(step2, context.new_rvalue_from_int(typ, 0x55));
571-
let right = self.shl(right, context.new_rvalue_from_int(typ, 1));
572-
let step3 = self.or(left, right);
573-
574-
step3
575-
},
576-
16 => {
577-
// First step.
578-
let left = self.and(value, context.new_rvalue_from_int(typ, 0x5555));
579-
let left = self.shl(left, context.new_rvalue_from_int(typ, 1));
580-
let right = self.and(value, context.new_rvalue_from_int(typ, 0xAAAA));
581-
let right = self.lshr(right, context.new_rvalue_from_int(typ, 1));
582-
let step1 = self.or(left, right);
583-
584-
// Second step.
585-
let left = self.and(step1, context.new_rvalue_from_int(typ, 0x3333));
586-
let left = self.shl(left, context.new_rvalue_from_int(typ, 2));
587-
let right = self.and(step1, context.new_rvalue_from_int(typ, 0xCCCC));
588-
let right = self.lshr(right, context.new_rvalue_from_int(typ, 2));
589-
let step2 = self.or(left, right);
590-
591-
// Third step.
592-
let left = self.and(step2, context.new_rvalue_from_int(typ, 0x0F0F));
593-
let left = self.shl(left, context.new_rvalue_from_int(typ, 4));
594-
let right = self.and(step2, context.new_rvalue_from_int(typ, 0xF0F0));
595-
let right = self.lshr(right, context.new_rvalue_from_int(typ, 4));
596-
let step3 = self.or(left, right);
597-
598-
// Fourth step.
599-
let left = self.and(step3, context.new_rvalue_from_int(typ, 0x00FF));
600-
let left = self.shl(left, context.new_rvalue_from_int(typ, 8));
601-
let right = self.and(step3, context.new_rvalue_from_int(typ, 0xFF00));
602-
let right = self.lshr(right, context.new_rvalue_from_int(typ, 8));
603-
let step4 = self.or(left, right);
552+
8 | 16 | 32 | 64 => {
553+
let mask = ((1u128 << width) - 1) as u64;
554+
let (m0, m1, m2) = if width > 16 {
555+
(
556+
context.new_rvalue_from_long(typ, (0x5555555555555555u64 & mask) as i64),
557+
context.new_rvalue_from_long(typ, (0x3333333333333333u64 & mask) as i64),
558+
context.new_rvalue_from_long(typ, (0x0f0f0f0f0f0f0f0fu64 & mask) as i64),
559+
)
560+
} else {
561+
(
562+
context.new_rvalue_from_int(typ, (0x5555u64 & mask) as i32),
563+
context.new_rvalue_from_int(typ, (0x3333u64 & mask) as i32),
564+
context.new_rvalue_from_int(typ, (0x0f0fu64 & mask) as i32),
565+
)
566+
};
567+
let one = context.new_rvalue_from_int(typ, 1);
568+
let two = context.new_rvalue_from_int(typ, 2);
569+
let four = context.new_rvalue_from_int(typ, 4);
604570

605-
step4
606-
},
607-
32 => {
608-
// TODO(antoyo): Refactor with other implementations.
609571
// First step.
610-
let left = self.and(value, context.new_rvalue_from_long(typ, 0x55555555));
611-
let left = self.shl(left, context.new_rvalue_from_long(typ, 1));
612-
let right = self.and(value, context.new_rvalue_from_long(typ, 0xAAAAAAAA));
613-
let right = self.lshr(right, context.new_rvalue_from_long(typ, 1));
572+
let left = self.lshr(value, one);
573+
let left = self.and(left, m0);
574+
let right = self.and(value, m0);
575+
let right = self.shl(right, one);
614576
let step1 = self.or(left, right);
615577

616578
// Second step.
617-
let left = self.and(step1, context.new_rvalue_from_long(typ, 0x33333333));
618-
let left = self.shl(left, context.new_rvalue_from_long(typ, 2));
619-
let right = self.and(step1, context.new_rvalue_from_long(typ, 0xCCCCCCCC));
620-
let right = self.lshr(right, context.new_rvalue_from_long(typ, 2));
579+
let left = self.lshr(step1, two);
580+
let left = self.and(left, m1);
581+
let right = self.and(step1, m1);
582+
let right = self.shl(right, two);
621583
let step2 = self.or(left, right);
622584

623585
// Third step.
624-
let left = self.and(step2, context.new_rvalue_from_long(typ, 0x0F0F0F0F));
625-
let left = self.shl(left, context.new_rvalue_from_long(typ, 4));
626-
let right = self.and(step2, context.new_rvalue_from_long(typ, 0xF0F0F0F0));
627-
let right = self.lshr(right, context.new_rvalue_from_long(typ, 4));
586+
let left = self.lshr(step2, four);
587+
let left = self.and(left, m2);
588+
let right = self.and(step2, m2);
589+
let right = self.shl(right, four);
628590
let step3 = self.or(left, right);
629591

630592
// Fourth step.
631-
let left = self.and(step3, context.new_rvalue_from_long(typ, 0x00FF00FF));
632-
let left = self.shl(left, context.new_rvalue_from_long(typ, 8));
633-
let right = self.and(step3, context.new_rvalue_from_long(typ, 0xFF00FF00));
634-
let right = self.lshr(right, context.new_rvalue_from_long(typ, 8));
635-
let step4 = self.or(left, right);
636-
637-
// Fifth step.
638-
let left = self.and(step4, context.new_rvalue_from_long(typ, 0x0000FFFF));
639-
let left = self.shl(left, context.new_rvalue_from_long(typ, 16));
640-
let right = self.and(step4, context.new_rvalue_from_long(typ, 0xFFFF0000));
641-
let right = self.lshr(right, context.new_rvalue_from_long(typ, 16));
642-
let step5 = self.or(left, right);
643-
644-
step5
645-
},
646-
64 => {
647-
// First step.
648-
let left = self.shl(value, context.new_rvalue_from_long(typ, 32));
649-
let right = self.lshr(value, context.new_rvalue_from_long(typ, 32));
650-
let step1 = self.or(left, right);
651-
652-
// Second step.
653-
let left = self.and(step1, context.new_rvalue_from_long(typ, 0x0001FFFF0001FFFF));
654-
let left = self.shl(left, context.new_rvalue_from_long(typ, 15));
655-
let right = self.and(step1, context.new_rvalue_from_long(typ, 0xFFFE0000FFFE0000u64 as i64)); // TODO(antoyo): transmute the number instead?
656-
let right = self.lshr(right, context.new_rvalue_from_long(typ, 17));
657-
let step2 = self.or(left, right);
658-
659-
// Third step.
660-
let left = self.lshr(step2, context.new_rvalue_from_long(typ, 10));
661-
let left = self.xor(step2, left);
662-
let temp = self.and(left, context.new_rvalue_from_long(typ, 0x003F801F003F801F));
663-
664-
let left = self.shl(temp, context.new_rvalue_from_long(typ, 10));
665-
let left = self.or(temp, left);
666-
let step3 = self.xor(left, step2);
667-
668-
// Fourth step.
669-
let left = self.lshr(step3, context.new_rvalue_from_long(typ, 4));
670-
let left = self.xor(step3, left);
671-
let temp = self.and(left, context.new_rvalue_from_long(typ, 0x0E0384210E038421));
672-
673-
let left = self.shl(temp, context.new_rvalue_from_long(typ, 4));
674-
let left = self.or(temp, left);
675-
let step4 = self.xor(left, step3);
676-
677-
// Fifth step.
678-
let left = self.lshr(step4, context.new_rvalue_from_long(typ, 2));
679-
let left = self.xor(step4, left);
680-
let temp = self.and(left, context.new_rvalue_from_long(typ, 0x2248884222488842));
681-
682-
let left = self.shl(temp, context.new_rvalue_from_long(typ, 2));
683-
let left = self.or(temp, left);
684-
let step5 = self.xor(left, step4);
685-
686-
step5
593+
if width == 8 {
594+
step3
595+
} else {
596+
self.gcc_bswap(step3, width)
597+
}
687598
},
688599
128 => {
689600
// TODO(antoyo): find a more efficient implementation?

0 commit comments

Comments
 (0)