Skip to content

Commit eda93d1

Browse files
committed
Add vrshl, vrshr, vrshrn, vrsra, vsra neon instructions
1 parent 796bfdf commit eda93d1

File tree

5 files changed

+6272
-4085
lines changed

5 files changed

+6272
-4085
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 278 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4605,6 +4605,160 @@ pub unsafe fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
46054605
transmute(a)
46064606
}
46074607

4608+
/// Signed rounding shift left
4609+
#[inline]
4610+
#[target_feature(enable = "neon")]
4611+
#[cfg_attr(test, assert_instr(srshl))]
4612+
pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 {
4613+
#[allow(improper_ctypes)]
4614+
extern "C" {
4615+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.i64")]
4616+
fn vrshld_s64_(a: i64, b: i64) -> i64;
4617+
}
4618+
vrshld_s64_(a, b)
4619+
}
4620+
4621+
/// Unsigned rounding shift left
4622+
#[inline]
4623+
#[target_feature(enable = "neon")]
4624+
#[cfg_attr(test, assert_instr(urshl))]
4625+
pub unsafe fn vrshld_u64(a: u64, b: i64) -> u64 {
4626+
#[allow(improper_ctypes)]
4627+
extern "C" {
4628+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.i64")]
4629+
fn vrshld_u64_(a: u64, b: i64) -> u64;
4630+
}
4631+
vrshld_u64_(a, b)
4632+
}
4633+
4634+
/// Signed rounding shift right
4635+
#[inline]
4636+
#[target_feature(enable = "neon")]
4637+
#[cfg_attr(test, assert_instr(srshr, N = 2))]
4638+
#[rustc_legacy_const_generics(1)]
4639+
pub unsafe fn vrshrd_n_s64<const N: i32>(a: i64) -> i64 {
4640+
static_assert!(N : i32 where N >= 1 && N <= 64);
4641+
vrshld_s64(a, -N as i64)
4642+
}
4643+
4644+
/// Unsigned rounding shift right
4645+
#[inline]
4646+
#[target_feature(enable = "neon")]
4647+
#[cfg_attr(test, assert_instr(urshr, N = 2))]
4648+
#[rustc_legacy_const_generics(1)]
4649+
pub unsafe fn vrshrd_n_u64<const N: i32>(a: u64) -> u64 {
4650+
static_assert!(N : i32 where N >= 1 && N <= 64);
4651+
vrshld_u64(a, -N as i64)
4652+
}
4653+
4654+
/// Rounding shift right narrow
4655+
#[inline]
4656+
#[target_feature(enable = "neon")]
4657+
#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
4658+
#[rustc_legacy_const_generics(2)]
4659+
pub unsafe fn vrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
4660+
static_assert!(N : i32 where N >= 1 && N <= 8);
4661+
simd_shuffle16(a, vrshrn_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
4662+
}
4663+
4664+
/// Rounding shift right narrow
4665+
#[inline]
4666+
#[target_feature(enable = "neon")]
4667+
#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
4668+
#[rustc_legacy_const_generics(2)]
4669+
pub unsafe fn vrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
4670+
static_assert!(N : i32 where N >= 1 && N <= 16);
4671+
simd_shuffle8(a, vrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
4672+
}
4673+
4674+
/// Rounding shift right narrow
4675+
#[inline]
4676+
#[target_feature(enable = "neon")]
4677+
#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
4678+
#[rustc_legacy_const_generics(2)]
4679+
pub unsafe fn vrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
4680+
static_assert!(N : i32 where N >= 1 && N <= 32);
4681+
simd_shuffle4(a, vrshrn_n_s64::<N>(b), [0, 1, 2, 3])
4682+
}
4683+
4684+
/// Rounding shift right narrow
4685+
#[inline]
4686+
#[target_feature(enable = "neon")]
4687+
#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
4688+
#[rustc_legacy_const_generics(2)]
4689+
pub unsafe fn vrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
4690+
static_assert!(N : i32 where N >= 1 && N <= 8);
4691+
simd_shuffle16(a, vrshrn_n_u16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
4692+
}
4693+
4694+
/// Rounding shift right narrow
4695+
#[inline]
4696+
#[target_feature(enable = "neon")]
4697+
#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
4698+
#[rustc_legacy_const_generics(2)]
4699+
pub unsafe fn vrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
4700+
static_assert!(N : i32 where N >= 1 && N <= 16);
4701+
simd_shuffle8(a, vrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
4702+
}
4703+
4704+
/// Rounding shift right narrow
4705+
#[inline]
4706+
#[target_feature(enable = "neon")]
4707+
#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
4708+
#[rustc_legacy_const_generics(2)]
4709+
pub unsafe fn vrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
4710+
static_assert!(N : i32 where N >= 1 && N <= 32);
4711+
simd_shuffle4(a, vrshrn_n_u64::<N>(b), [0, 1, 2, 3])
4712+
}
4713+
4714+
/// Signed rounding shift right and accumulate
4715+
#[inline]
4716+
#[target_feature(enable = "neon")]
4717+
#[cfg_attr(test, assert_instr(srsra, N = 2))]
4718+
#[rustc_legacy_const_generics(2)]
4719+
pub unsafe fn vrsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
4720+
static_assert!(N : i32 where N >= 1 && N <= 64);
4721+
let b: int64x1_t = vrshr_n_s64::<N>(transmute(b));
4722+
transmute(simd_add(transmute(a), b))
4723+
}
4724+
4725+
/// Ungisned rounding shift right and accumulate
4726+
#[inline]
4727+
#[target_feature(enable = "neon")]
4728+
#[cfg_attr(test, assert_instr(ursra, N = 2))]
4729+
#[rustc_legacy_const_generics(2)]
4730+
pub unsafe fn vrsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
4731+
static_assert!(N : i32 where N >= 1 && N <= 64);
4732+
let b: uint64x1_t = vrshr_n_u64::<N>(transmute(b));
4733+
transmute(simd_add(transmute(a), b))
4734+
}
4735+
4736+
/// Signed Shift left
4737+
#[inline]
4738+
#[target_feature(enable = "neon")]
4739+
#[cfg_attr(test, assert_instr(sshl))]
4740+
pub unsafe fn vshld_s64(a: i64, b: i64) -> i64 {
4741+
#[allow(improper_ctypes)]
4742+
extern "C" {
4743+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.i64")]
4744+
fn vshld_s64_(a: i64, b: i64) -> i64;
4745+
}
4746+
vshld_s64_(a, b)
4747+
}
4748+
4749+
/// Unsigned Shift left
4750+
#[inline]
4751+
#[target_feature(enable = "neon")]
4752+
#[cfg_attr(test, assert_instr(ushl))]
4753+
pub unsafe fn vshld_u64(a: u64, b: i64) -> u64 {
4754+
#[allow(improper_ctypes)]
4755+
extern "C" {
4756+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.i64")]
4757+
fn vshld_u64_(a: u64, b: i64) -> u64;
4758+
}
4759+
vshld_u64_(a, b)
4760+
}
4761+
46084762
/// Signed shift left long
46094763
#[inline]
46104764
#[target_feature(enable = "neon")]
@@ -9872,6 +10026,130 @@ mod test {
987210026
assert_eq!(r, e);
987310027
}
987410028

10029+
#[simd_test(enable = "neon")]
10030+
unsafe fn test_vrshld_s64() {
10031+
let a: i64 = 1;
10032+
let b: i64 = 2;
10033+
let e: i64 = 4;
10034+
let r: i64 = transmute(vrshld_s64(transmute(a), transmute(b)));
10035+
assert_eq!(r, e);
10036+
}
10037+
10038+
#[simd_test(enable = "neon")]
10039+
unsafe fn test_vrshld_u64() {
10040+
let a: u64 = 1;
10041+
let b: i64 = 2;
10042+
let e: u64 = 4;
10043+
let r: u64 = transmute(vrshld_u64(transmute(a), transmute(b)));
10044+
assert_eq!(r, e);
10045+
}
10046+
10047+
#[simd_test(enable = "neon")]
10048+
unsafe fn test_vrshrd_n_s64() {
10049+
let a: i64 = 4;
10050+
let e: i64 = 1;
10051+
let r: i64 = transmute(vrshrd_n_s64::<2>(transmute(a)));
10052+
assert_eq!(r, e);
10053+
}
10054+
10055+
#[simd_test(enable = "neon")]
10056+
unsafe fn test_vrshrd_n_u64() {
10057+
let a: u64 = 4;
10058+
let e: u64 = 1;
10059+
let r: u64 = transmute(vrshrd_n_u64::<2>(transmute(a)));
10060+
assert_eq!(r, e);
10061+
}
10062+
10063+
#[simd_test(enable = "neon")]
10064+
unsafe fn test_vrshrn_high_n_s16() {
10065+
let a: i8x8 = i8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
10066+
let b: i16x8 = i16x8::new(32, 36, 40, 44, 48, 52, 56, 60);
10067+
let e: i8x16 = i8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15);
10068+
let r: i8x16 = transmute(vrshrn_high_n_s16::<2>(transmute(a), transmute(b)));
10069+
assert_eq!(r, e);
10070+
}
10071+
10072+
#[simd_test(enable = "neon")]
10073+
unsafe fn test_vrshrn_high_n_s32() {
10074+
let a: i16x4 = i16x4::new(0, 1, 8, 9);
10075+
let b: i32x4 = i32x4::new(32, 36, 40, 44);
10076+
let e: i16x8 = i16x8::new(0, 1, 8, 9, 8, 9, 10, 11);
10077+
let r: i16x8 = transmute(vrshrn_high_n_s32::<2>(transmute(a), transmute(b)));
10078+
assert_eq!(r, e);
10079+
}
10080+
10081+
#[simd_test(enable = "neon")]
10082+
unsafe fn test_vrshrn_high_n_s64() {
10083+
let a: i32x2 = i32x2::new(0, 1);
10084+
let b: i64x2 = i64x2::new(32, 36);
10085+
let e: i32x4 = i32x4::new(0, 1, 8, 9);
10086+
let r: i32x4 = transmute(vrshrn_high_n_s64::<2>(transmute(a), transmute(b)));
10087+
assert_eq!(r, e);
10088+
}
10089+
10090+
#[simd_test(enable = "neon")]
10091+
unsafe fn test_vrshrn_high_n_u16() {
10092+
let a: u8x8 = u8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
10093+
let b: u16x8 = u16x8::new(32, 36, 40, 44, 48, 52, 56, 60);
10094+
let e: u8x16 = u8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15);
10095+
let r: u8x16 = transmute(vrshrn_high_n_u16::<2>(transmute(a), transmute(b)));
10096+
assert_eq!(r, e);
10097+
}
10098+
10099+
#[simd_test(enable = "neon")]
10100+
unsafe fn test_vrshrn_high_n_u32() {
10101+
let a: u16x4 = u16x4::new(0, 1, 8, 9);
10102+
let b: u32x4 = u32x4::new(32, 36, 40, 44);
10103+
let e: u16x8 = u16x8::new(0, 1, 8, 9, 8, 9, 10, 11);
10104+
let r: u16x8 = transmute(vrshrn_high_n_u32::<2>(transmute(a), transmute(b)));
10105+
assert_eq!(r, e);
10106+
}
10107+
10108+
#[simd_test(enable = "neon")]
10109+
unsafe fn test_vrshrn_high_n_u64() {
10110+
let a: u32x2 = u32x2::new(0, 1);
10111+
let b: u64x2 = u64x2::new(32, 36);
10112+
let e: u32x4 = u32x4::new(0, 1, 8, 9);
10113+
let r: u32x4 = transmute(vrshrn_high_n_u64::<2>(transmute(a), transmute(b)));
10114+
assert_eq!(r, e);
10115+
}
10116+
10117+
#[simd_test(enable = "neon")]
10118+
unsafe fn test_vrsrad_n_s64() {
10119+
let a: i64 = 1;
10120+
let b: i64 = 4;
10121+
let e: i64 = 2;
10122+
let r: i64 = transmute(vrsrad_n_s64::<2>(transmute(a), transmute(b)));
10123+
assert_eq!(r, e);
10124+
}
10125+
10126+
#[simd_test(enable = "neon")]
10127+
unsafe fn test_vrsrad_n_u64() {
10128+
let a: u64 = 1;
10129+
let b: u64 = 4;
10130+
let e: u64 = 2;
10131+
let r: u64 = transmute(vrsrad_n_u64::<2>(transmute(a), transmute(b)));
10132+
assert_eq!(r, e);
10133+
}
10134+
10135+
#[simd_test(enable = "neon")]
10136+
unsafe fn test_vshld_s64() {
10137+
let a: i64 = 1;
10138+
let b: i64 = 2;
10139+
let e: i64 = 4;
10140+
let r: i64 = transmute(vshld_s64(transmute(a), transmute(b)));
10141+
assert_eq!(r, e);
10142+
}
10143+
10144+
#[simd_test(enable = "neon")]
10145+
unsafe fn test_vshld_u64() {
10146+
let a: u64 = 1;
10147+
let b: i64 = 2;
10148+
let e: u64 = 4;
10149+
let r: u64 = transmute(vshld_u64(transmute(a), transmute(b)));
10150+
assert_eq!(r, e);
10151+
}
10152+
987510153
#[simd_test(enable = "neon")]
987610154
unsafe fn test_vshll_high_n_s8() {
987710155
let a: i8x16 = i8x16::new(0, 0, 1, 2, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8);

0 commit comments

Comments
 (0)