Skip to content
This repository was archived by the owner on May 28, 2025. It is now read-only.

Commit 63646b1

Browse files
committed
Implement llvm.x86.avx2.pmovmskb llvm intrinsic
1 parent 48a6b58 commit 63646b1

File tree

2 files changed

+31
-5
lines changed

2 files changed

+31
-5
lines changed

example/std_example.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ unsafe fn test_simd() {
6565
assert_eq!(std::mem::transmute::<_, [u16; 8]>(cmp_lt), [0, 0, 0, 0, 0, 0, 0, 0]);
6666

6767
test_mm_slli_si128();
68+
test_mm_movemask_epi8();
69+
test_mm256_movemask_epi8();
6870
}
6971

7072
#[target_feature(enable = "sse2")]
@@ -109,6 +111,31 @@ unsafe fn test_mm_slli_si128() {
109111
assert_eq_m128i(r, _mm_set1_epi8(0));
110112
}
111113

114+
#[target_feature(enable = "sse2")]
115+
unsafe fn test_mm_movemask_epi8() {
116+
use std::arch::x86_64::*;
117+
118+
#[rustfmt::skip]
119+
let a = _mm_setr_epi8(
120+
0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
121+
0b0101, 0b1111_0000u8 as i8, 0, 0,
122+
0, 0, 0b1111_0000u8 as i8, 0b0101,
123+
0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
124+
);
125+
let r = _mm_movemask_epi8(a);
126+
assert_eq!(r, 0b10100100_00100101);
127+
}
128+
129+
#[target_feature(enable = "avx2")]
130+
unsafe fn test_mm256_movemask_epi8() {
131+
use std::arch::x86_64::*;
132+
133+
let a = _mm256_set1_epi8(-1);
134+
let r = _mm256_movemask_epi8(a);
135+
let e = -1;
136+
assert_eq!(r, e);
137+
}
138+
112139
fn assert_eq_m128i(x: std::arch::x86_64::__m128i, y: std::arch::x86_64::__m128i) {
113140
unsafe {
114141
assert_eq!(std::mem::transmute::<_, [u8; 16]>(x), std::mem::transmute::<_, [u8; 16]>(x));

src/llvm_intrinsics.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,15 @@ pub fn codegen_llvm_intrinsic_call<'a, 'tcx: 'a>(
3333
crate::trap::trap_unimplemented(fx, intrinsic);
3434
};
3535

36-
// Used by _mm_movemask_epi8
37-
llvm.x86.sse2.pmovmskb.128, (c a) {
36+
// Used by `_mm_movemask_epi8` and `_mm256_movemask_epi8`
37+
llvm.x86.sse2.pmovmskb.128 | llvm.x86.avx2.pmovmskb, (c a) {
3838
let (lane_layout, lane_count) = crate::intrinsics::lane_type_and_count(fx, a.layout(), intrinsic);
3939
assert_eq!(lane_layout.ty.sty, fx.tcx.types.i8.sty);
40-
assert_eq!(lane_count, 16);
40+
assert!(lane_count == 16 || lane_count == 32);
4141

4242
let mut res = fx.bcx.ins().iconst(types::I32, 0);
4343

44-
for lane in 0..16 {
44+
for lane in 0..lane_count {
4545
let a_lane = a.value_field(fx, mir::Field::new(lane.try_into().unwrap())).load_scalar(fx);
4646
let a_lane_sign = fx.bcx.ins().ushr_imm(a_lane, 7); // extract sign bit of 8bit int
4747
let a_lane_sign = fx.bcx.ins().uextend(types::I32, a_lane_sign);
@@ -65,6 +65,5 @@ pub fn codegen_llvm_intrinsic_call<'a, 'tcx: 'a>(
6565
// llvm.x86.avx2.vperm2i128
6666
// llvm.x86.ssse3.pshuf.b.128
6767
// llvm.x86.avx2.pshuf.b
68-
// llvm.x86.avx2.pmovmskb
6968
// llvm.x86.avx2.psrli.w
7069
// llvm.x86.sse2.psrli.w

0 commit comments

Comments
 (0)