Skip to content

Commit 55e6ca1

Browse files
authored
Convert _mm_alignr_epi8 to const generics (#1061)
1 parent 9ef0581 commit 55e6ca1

File tree

2 files changed

+46
-62
lines changed

2 files changed

+46
-62
lines changed

crates/core_arch/src/x86/avx512bw.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9140,7 +9140,7 @@ pub unsafe fn _mm_mask_alignr_epi8<const IMM8: i32>(
91409140
b: __m128i,
91419141
) -> __m128i {
91429142
static_assert_imm8!(IMM8);
9143-
let r = _mm_alignr_epi8(a, b, IMM8);
9143+
let r = _mm_alignr_epi8::<IMM8>(a, b);
91449144
transmute(simd_select_bitmask(k, r.as_i8x16(), src.as_i8x16()))
91459145
}
91469146

@@ -9157,7 +9157,7 @@ pub unsafe fn _mm_maskz_alignr_epi8<const IMM8: i32>(
91579157
b: __m128i,
91589158
) -> __m128i {
91599159
static_assert_imm8!(IMM8);
9160-
let r = _mm_alignr_epi8(a, b, IMM8);
9160+
let r = _mm_alignr_epi8::<IMM8>(a, b);
91619161
let zero = _mm_setzero_si128().as_i8x16();
91629162
transmute(simd_select_bitmask(k, r.as_i8x16(), zero))
91639163
}

crates/core_arch/src/x86/ssse3.rs

Lines changed: 44 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -86,71 +86,55 @@ pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
8686
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_epi8)
8787
#[inline]
8888
#[target_feature(enable = "ssse3")]
89-
#[cfg_attr(test, assert_instr(palignr, n = 15))]
90-
#[rustc_args_required_const(2)]
89+
#[cfg_attr(test, assert_instr(palignr, IMM8 = 15))]
90+
#[rustc_legacy_const_generics(2)]
9191
#[stable(feature = "simd_x86", since = "1.27.0")]
92-
pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i, n: i32) -> __m128i {
93-
let n = n as u32;
92+
pub unsafe fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
93+
static_assert_imm8!(IMM8);
9494
// If palignr is shifting the pair of vectors more than the size of two
9595
// lanes, emit zero.
96-
if n > 32 {
96+
if IMM8 > 32 {
9797
return _mm_set1_epi8(0);
9898
}
9999
// If palignr is shifting the pair of input vectors more than one lane,
100100
// but less than two lanes, convert to shifting in zeroes.
101-
let (a, b, n) = if n > 16 {
102-
(_mm_set1_epi8(0), a, n - 16)
101+
let (a, b) = if IMM8 > 16 {
102+
(_mm_set1_epi8(0), a)
103103
} else {
104-
(a, b, n)
104+
(a, b)
105105
};
106-
let a = a.as_i8x16();
107-
let b = b.as_i8x16();
108-
109-
macro_rules! shuffle {
110-
($shift:expr) => {
111-
simd_shuffle16(
112-
b,
113-
a,
114-
[
115-
0 + $shift,
116-
1 + $shift,
117-
2 + $shift,
118-
3 + $shift,
119-
4 + $shift,
120-
5 + $shift,
121-
6 + $shift,
122-
7 + $shift,
123-
8 + $shift,
124-
9 + $shift,
125-
10 + $shift,
126-
11 + $shift,
127-
12 + $shift,
128-
13 + $shift,
129-
14 + $shift,
130-
15 + $shift,
131-
],
132-
)
133-
};
106+
const fn mask(shift: u32, i: u32) -> u32 {
107+
if shift > 32 {
108+
// Unused, but needs to be a valid index.
109+
i
110+
} else if shift > 16 {
111+
shift - 16 + i
112+
} else {
113+
shift + i
114+
}
134115
}
135-
let r: i8x16 = match n {
136-
0 => shuffle!(0),
137-
1 => shuffle!(1),
138-
2 => shuffle!(2),
139-
3 => shuffle!(3),
140-
4 => shuffle!(4),
141-
5 => shuffle!(5),
142-
6 => shuffle!(6),
143-
7 => shuffle!(7),
144-
8 => shuffle!(8),
145-
9 => shuffle!(9),
146-
10 => shuffle!(10),
147-
11 => shuffle!(11),
148-
12 => shuffle!(12),
149-
13 => shuffle!(13),
150-
14 => shuffle!(14),
151-
15 => shuffle!(15),
152-
_ => shuffle!(16),
153-
};
116+
let r: i8x16 = simd_shuffle16(
117+
b.as_i8x16(),
118+
a.as_i8x16(),
119+
[
120+
mask(IMM8 as u32, 0),
121+
mask(IMM8 as u32, 1),
122+
mask(IMM8 as u32, 2),
123+
mask(IMM8 as u32, 3),
124+
mask(IMM8 as u32, 4),
125+
mask(IMM8 as u32, 5),
126+
mask(IMM8 as u32, 6),
127+
mask(IMM8 as u32, 7),
128+
mask(IMM8 as u32, 8),
129+
mask(IMM8 as u32, 9),
130+
mask(IMM8 as u32, 10),
131+
mask(IMM8 as u32, 11),
132+
mask(IMM8 as u32, 12),
133+
mask(IMM8 as u32, 13),
134+
mask(IMM8 as u32, 14),
135+
mask(IMM8 as u32, 15),
136+
],
137+
);
154138
transmute(r)
155139
}
156140

@@ -404,29 +388,29 @@ mod tests {
404388
12, 5, 5, 10,
405389
4, 1, 8, 0,
406390
);
407-
let r = _mm_alignr_epi8(a, b, 33);
391+
let r = _mm_alignr_epi8::<33>(a, b);
408392
assert_eq_m128i(r, _mm_set1_epi8(0));
409393

410-
let r = _mm_alignr_epi8(a, b, 17);
394+
let r = _mm_alignr_epi8::<17>(a, b);
411395
#[rustfmt::skip]
412396
let expected = _mm_setr_epi8(
413397
2, 3, 4, 5, 6, 7, 8, 9,
414398
10, 11, 12, 13, 14, 15, 16, 0,
415399
);
416400
assert_eq_m128i(r, expected);
417401

418-
let r = _mm_alignr_epi8(a, b, 16);
402+
let r = _mm_alignr_epi8::<16>(a, b);
419403
assert_eq_m128i(r, a);
420404

421-
let r = _mm_alignr_epi8(a, b, 15);
405+
let r = _mm_alignr_epi8::<15>(a, b);
422406
#[rustfmt::skip]
423407
let expected = _mm_setr_epi8(
424408
0, 1, 2, 3, 4, 5, 6, 7,
425409
8, 9, 10, 11, 12, 13, 14, 15,
426410
);
427411
assert_eq_m128i(r, expected);
428412

429-
let r = _mm_alignr_epi8(a, b, 0);
413+
let r = _mm_alignr_epi8::<0>(a, b);
430414
assert_eq_m128i(r, b);
431415
}
432416

0 commit comments

Comments
 (0)