Skip to content

Commit 017ba77

Browse files
folkertdevAmanieu
authored andcommitted
add vec_perm
1 parent a04b8e2 commit 017ba77

File tree

1 file changed

+139
-0
lines changed

1 file changed

+139
-0
lines changed

crates/core_arch/src/s390x/vector.rs

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ unsafe extern "unadjusted" {
100100
#[link_name = "llvm.s390.verimh"] fn verimh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short, d: i32) -> vector_signed_short;
101101
#[link_name = "llvm.s390.verimf"] fn verimf(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int, d: i32) -> vector_signed_int;
102102
#[link_name = "llvm.s390.verimg"] fn verimg(a: vector_signed_long_long, b: vector_signed_long_long, c: vector_signed_long_long, d: i32) -> vector_signed_long_long;
103+
104+
#[link_name = "llvm.s390.vperm"] fn vperm(a: vector_signed_char, b: vector_signed_char, c: vector_unsigned_char) -> vector_signed_char;
103105
}
104106

105107
impl_from! { i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -1256,6 +1258,43 @@ mod sealed {
12561258
vector_signed_int, vmrlf, vmrhf,
12571259
vector_signed_long_long, vmrlg, vmrhg
12581260
}
1261+
1262+
#[unstable(feature = "stdarch_s390x", issue = "135681")]
1263+
pub trait VectorPerm {
1264+
unsafe fn vec_perm(self, other: Self, c: vector_unsigned_char) -> Self;
1265+
}
1266+
1267+
macro_rules! impl_merge {
1268+
($($ty:ident),*) => {
1269+
$(
1270+
#[unstable(feature = "stdarch_s390x", issue = "135681")]
1271+
impl VectorPerm for $ty {
1272+
#[inline]
1273+
#[target_feature(enable = "vector")]
1274+
unsafe fn vec_perm(self, other: Self, c: vector_unsigned_char) -> Self {
1275+
transmute(vperm(transmute(self), transmute(other), c))
1276+
}
1277+
}
1278+
)*
1279+
}
1280+
}
1281+
1282+
impl_merge! {
1283+
vector_signed_char,
1284+
vector_signed_short,
1285+
vector_signed_int,
1286+
vector_signed_long_long,
1287+
vector_unsigned_char,
1288+
vector_unsigned_short,
1289+
vector_unsigned_int,
1290+
vector_unsigned_long_long,
1291+
vector_bool_char,
1292+
vector_bool_short,
1293+
vector_bool_int,
1294+
vector_bool_long_long,
1295+
vector_float,
1296+
vector_double
1297+
}
12591298
}
12601299

12611300
/// Vector element-wise addition.
@@ -1806,6 +1845,16 @@ pub unsafe fn vec_genmasks_64<const L: u8, const H: u8>() -> vector_unsigned_lon
18061845
vector_unsigned_long_long(const { [genmasks(u64::BITS, L, H); 2] })
18071846
}
18081847

1848+
/// Returns a vector that contains some elements of two vectors, in the order specified by a third vector.
1849+
/// Each byte of the result is selected by using the least significant 5 bits of the corresponding byte of c as an index into the concatenated bytes of a and b.
1850+
/// Note: The vector generate mask built-in function [`vec_genmask`] could help generate the mask c.
1851+
#[inline]
1852+
#[target_feature(enable = "vector")]
1853+
#[unstable(feature = "stdarch_s390x", issue = "135681")]
1854+
pub unsafe fn vec_perm<T: sealed::VectorPerm>(a: T, b: T, c: vector_unsigned_char) -> T {
1855+
a.vec_perm(b, c)
1856+
}
1857+
18091858
#[cfg(test)]
18101859
mod tests {
18111860
use super::*;
@@ -2228,4 +2277,94 @@ mod tests {
22282277
[0x00000000, 0x11111111, 0x22222222, 0x33333333],
22292278
[0xCCCCCCCC, 0x22222222, 0xDDDDDDDD, 0x33333333]
22302279
}
2280+
2281+
macro_rules! test_vec_perm {
2282+
{$name:ident,
2283+
$shorttype:ident, $longtype:ident,
2284+
[$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => {
2285+
#[simd_test(enable = "vector")]
2286+
unsafe fn $name() {
2287+
let a: $longtype = transmute($shorttype::new($($a),+));
2288+
let b: $longtype = transmute($shorttype::new($($b),+));
2289+
let c: vector_unsigned_char = transmute(u8x16::new($($c),+));
2290+
let d = $shorttype::new($($d),+);
2291+
2292+
let r: $shorttype = transmute(vec_perm(a, b, c));
2293+
assert_eq!(d, r);
2294+
}
2295+
}
2296+
}
2297+
2298+
test_vec_perm! {test_vec_perm_u8x16,
2299+
u8x16, vector_unsigned_char,
2300+
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
2301+
[100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115],
2302+
[0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
2303+
0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17],
2304+
[0, 1, 100, 101, 2, 3, 102, 103, 4, 5, 104, 105, 6, 7, 106, 107]}
2305+
test_vec_perm! {test_vec_perm_i8x16,
2306+
i8x16, vector_signed_char,
2307+
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
2308+
[100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115],
2309+
[0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
2310+
0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17],
2311+
[0, 1, 100, 101, 2, 3, 102, 103, 4, 5, 104, 105, 6, 7, 106, 107]}
2312+
2313+
test_vec_perm! {test_vec_perm_m8x16,
2314+
m8x16, vector_bool_char,
2315+
[false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false],
2316+
[true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true],
2317+
[0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
2318+
0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17],
2319+
[false, false, true, true, false, false, true, true, false, false, true, true, false, false, true, true]}
2320+
test_vec_perm! {test_vec_perm_u16x8,
2321+
u16x8, vector_unsigned_short,
2322+
[0, 1, 2, 3, 4, 5, 6, 7],
2323+
[10, 11, 12, 13, 14, 15, 16, 17],
2324+
[0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
2325+
0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17],
2326+
[0, 10, 1, 11, 2, 12, 3, 13]}
2327+
test_vec_perm! {test_vec_perm_i16x8,
2328+
i16x8, vector_signed_short,
2329+
[0, 1, 2, 3, 4, 5, 6, 7],
2330+
[10, 11, 12, 13, 14, 15, 16, 17],
2331+
[0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
2332+
0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17],
2333+
[0, 10, 1, 11, 2, 12, 3, 13]}
2334+
test_vec_perm! {test_vec_perm_m16x8,
2335+
m16x8, vector_bool_short,
2336+
[false, false, false, false, false, false, false, false],
2337+
[true, true, true, true, true, true, true, true],
2338+
[0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
2339+
0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17],
2340+
[false, true, false, true, false, true, false, true]}
2341+
2342+
test_vec_perm! {test_vec_perm_u32x4,
2343+
u32x4, vector_unsigned_int,
2344+
[0, 1, 2, 3],
2345+
[10, 11, 12, 13],
2346+
[0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
2347+
0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17],
2348+
[0, 10, 1, 11]}
2349+
test_vec_perm! {test_vec_perm_i32x4,
2350+
i32x4, vector_signed_int,
2351+
[0, 1, 2, 3],
2352+
[10, 11, 12, 13],
2353+
[0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
2354+
0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17],
2355+
[0, 10, 1, 11]}
2356+
test_vec_perm! {test_vec_perm_m32x4,
2357+
m32x4, vector_bool_int,
2358+
[false, false, false, false],
2359+
[true, true, true, true],
2360+
[0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
2361+
0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17],
2362+
[false, true, false, true]}
2363+
test_vec_perm! {test_vec_perm_f32x4,
2364+
f32x4, vector_float,
2365+
[0.0, 1.0, 2.0, 3.0],
2366+
[1.0, 1.1, 1.2, 1.3],
2367+
[0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
2368+
0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17],
2369+
[0.0, 1.0, 1.0, 1.1]}
22312370
}

0 commit comments

Comments
 (0)