Skip to content

Commit 7454388

Browse files
paolotetignzlbg
authored andcommitted
ACLE/DSP: implement remaining intrinsics
Adds: __smulbb, __smulbt, __smultb, __smultt, __smulwb, __smulwt __qdbl, __smlabb, __smlabt, __smlatb, __smlatt, __smlawb, __smlawt and related test-cases
1 parent 7040a75 commit 7454388

File tree

1 file changed

+315
-14
lines changed
  • crates/core_arch/src/acle

1 file changed

+315
-14
lines changed

crates/core_arch/src/acle/dsp.rs

Lines changed: 315 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,37 +4,138 @@
44
//!
55
//! Intrinsics that could live here:
66
//!
7-
//! - [ ] __smulbb
8-
//! - [ ] __smulbt
9-
//! - [ ] __smultb
10-
//! - [ ] __smultt
11-
//! - [ ] __smulwb
12-
//! - [ ] __smulwt
7+
//! - [x] __smulbb
8+
//! - [x] __smulbt
9+
//! - [x] __smultb
10+
//! - [x] __smultt
11+
//! - [x] __smulwb
12+
//! - [x] __smulwt
1313
//! - [x] __qadd
1414
//! - [x] __qsub
15-
//! - [ ] __qdbl
16-
//! - [ ] __smlabb
17-
//! - [ ] __smlabt
18-
//! - [ ] __smlatb
19-
//! - [ ] __smlatt
20-
//! - [ ] __smlawb
21-
//! - [ ] __smlawt
15+
//! - [x] __qdbl
16+
//! - [x] __smlabb
17+
//! - [x] __smlabt
18+
//! - [x] __smlatb
19+
//! - [x] __smlatt
20+
//! - [x] __smlawb
21+
//! - [x] __smlawt
2222
2323
#[cfg(test)]
2424
use stdsimd_test::assert_instr;
2525

26+
use crate::mem::transmute;
27+
use core_arch::acle::simd32::int16x2_t;
28+
2629
extern "C" {
30+
#[link_name = "llvm.arm.smulbb"]
31+
fn arm_smulbb(a: i32, b: i32) -> i32;
32+
33+
#[link_name = "llvm.arm.smulbt"]
34+
fn arm_smulbt(a: i32, b: i32) -> i32;
35+
36+
#[link_name = "llvm.arm.smultb"]
37+
fn arm_smultb(a: i32, b: i32) -> i32;
38+
39+
#[link_name = "llvm.arm.smultt"]
40+
fn arm_smultt(a: i32, b: i32) -> i32;
41+
42+
#[link_name = "llvm.arm.smulwb"]
43+
fn arm_smulwb(a: i32, b: i32) -> i32;
44+
45+
#[link_name = "llvm.arm.smulwt"]
46+
fn arm_smulwt(a: i32, b: i32) -> i32;
47+
2748
#[link_name = "llvm.arm.qadd"]
2849
fn arm_qadd(a: i32, b: i32) -> i32;
2950

3051
#[link_name = "llvm.arm.qsub"]
3152
fn arm_qsub(a: i32, b: i32) -> i32;
3253

54+
#[link_name = "llvm.arm.smlabb"]
55+
fn arm_smlabb(a: i32, b: i32, c: i32) -> i32;
56+
57+
#[link_name = "llvm.arm.smlabt"]
58+
fn arm_smlabt(a: i32, b: i32, c: i32) -> i32;
59+
60+
#[link_name = "llvm.arm.smlatb"]
61+
fn arm_smlatb(a: i32, b: i32, c: i32) -> i32;
62+
63+
#[link_name = "llvm.arm.smlatt"]
64+
fn arm_smlatt(a: i32, b: i32, c: i32) -> i32;
65+
66+
#[link_name = "llvm.arm.smlawb"]
67+
fn arm_smlawb(a: i32, b: i32, c: i32) -> i32;
68+
69+
#[link_name = "llvm.arm.smlawt"]
70+
fn arm_smlawt(a: i32, b: i32, c: i32) -> i32;
71+
}
72+
73+
/// Insert a SMULBB instruction
74+
///
75+
/// Returns the equivalent of a\[0\] * b\[0\]
76+
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
77+
#[inline]
78+
#[cfg_attr(test, assert_instr(smulbb))]
79+
pub unsafe fn __smulbb(a: int16x2_t, b: int16x2_t) -> i32 {
80+
arm_smulbb(transmute(a), transmute(b))
81+
}
82+
83+
/// Insert a SMULTB instruction
84+
///
85+
/// Returns the equivalent of a\[0\] * b\[1\]
86+
/// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
87+
#[inline]
88+
#[cfg_attr(test, assert_instr(smultb))]
89+
pub unsafe fn __smultb(a: int16x2_t, b: int16x2_t) -> i32 {
90+
arm_smultb(transmute(a), transmute(b))
91+
}
92+
93+
/// Insert a SMULTB instruction
94+
///
95+
/// Returns the equivalent of a\[1\] * b\[0\]
96+
/// where [0] is the lower 16 bits and [1] is the upper 16 bits.
97+
#[inline]
98+
#[cfg_attr(test, assert_instr(smulbt))]
99+
pub unsafe fn __smulbt(a: int16x2_t, b: int16x2_t) -> i32 {
100+
arm_smulbt(transmute(a), transmute(b))
101+
}
102+
103+
/// Insert a SMULTT instruction
104+
///
105+
/// Returns the equivalent of a\[1\] * b\[1\]
106+
/// where [0] is the lower 16 bits and [1] is the upper 16 bits.
107+
#[inline]
108+
#[cfg_attr(test, assert_instr(smultt))]
109+
pub unsafe fn __smultt(a: int16x2_t, b: int16x2_t) -> i32 {
110+
arm_smultt(transmute(a), transmute(b))
111+
}
112+
113+
/// Insert a SMULWB instruction
114+
///
115+
/// Multiplies the 32-bit signed first operand with the low halfword
116+
/// (as a 16-bit signed integer) of the second operand.
117+
/// Return the top 32 bits of the 48-bit product
118+
#[inline]
119+
#[cfg_attr(test, assert_instr(smulwb))]
120+
pub unsafe fn __smulwb(a: int16x2_t, b: i32) -> i32 {
121+
arm_smulwb(transmute(a), b)
122+
}
123+
124+
/// Insert a SMULWT instruction
125+
///
126+
/// Multiplies the 32-bit signed first operand with the high halfword
127+
/// (as a 16-bit signed integer) of the second operand.
128+
/// Return the top 32 bits of the 48-bit product
129+
#[inline]
130+
#[cfg_attr(test, assert_instr(smulwt))]
131+
pub unsafe fn __smulwt(a: int16x2_t, b: i32) -> i32 {
132+
arm_smulwt(transmute(a), b)
33133
}
34134

35135
/// Signed saturating addition
36136
///
37137
/// Returns the 32-bit saturating signed equivalent of a + b.
138+
/// Sets the Q flag if saturation occurs.
38139
#[inline]
39140
#[cfg_attr(test, assert_instr(qadd))]
40141
pub unsafe fn __qadd(a: i32, b: i32) -> i32 {
@@ -44,18 +145,150 @@ pub unsafe fn __qadd(a: i32, b: i32) -> i32 {
44145
/// Signed saturating subtraction
45146
///
46147
/// Returns the 32-bit saturating signed equivalent of a - b.
148+
/// Sets the Q flag if saturation occurs.
47149
#[inline]
48150
#[cfg_attr(test, assert_instr(qsub))]
49151
pub unsafe fn __qsub(a: i32, b: i32) -> i32 {
50152
arm_qsub(a, b)
51153
}
52154

155+
/// Insert a QADD instruction
156+
///
157+
/// Returns the 32-bit saturating signed equivalent of a + a
158+
/// Sets the Q flag if saturation occurs.
159+
#[inline]
160+
#[cfg_attr(test, assert_instr(qadd))]
161+
pub unsafe fn __qdbl(a: i32) -> i32 {
162+
arm_qadd(a, a)
163+
}
164+
165+
/// Insert a SMLABB instruction
166+
///
167+
/// Returns the equivalent of a\[0\] * b\[0\] + c
168+
/// where [0] is the lower 16 bits and [1] is the upper 16 bits.
169+
/// Sets the Q flag if overflow occurs on the addition.
170+
#[inline]
171+
#[cfg_attr(test, assert_instr(smlabb))]
172+
pub unsafe fn __smlabb(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
173+
arm_smlabb(transmute(a), transmute(b), c)
174+
}
175+
176+
/// Insert a SMLABT instruction
177+
///
178+
/// Returns the equivalent of a\[0\] * b\[1\] + c
179+
/// where [0] is the lower 16 bits and [1] is the upper 16 bits.
180+
/// Sets the Q flag if overflow occurs on the addition.
181+
#[inline]
182+
#[cfg_attr(test, assert_instr(smlabt))]
183+
pub unsafe fn __smlabt(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
184+
arm_smlabt(transmute(a), transmute(b), c)
185+
}
186+
187+
/// Insert a SMLATB instruction
188+
///
189+
/// Returns the equivalent of a\[1\] * b\[0\] + c
190+
/// where [0] is the lower 16 bits and [1] is the upper 16 bits.
191+
/// Sets the Q flag if overflow occurs on the addition.
192+
#[inline]
193+
#[cfg_attr(test, assert_instr(smlatb))]
194+
pub unsafe fn __smlatb(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
195+
arm_smlatb(transmute(a), transmute(b), c)
196+
}
197+
198+
/// Insert a SMLATT instruction
199+
///
200+
/// Returns the equivalent of a\[1\] * b\[1\] + c
201+
/// where [0] is the lower 16 bits and [1] is the upper 16 bits.
202+
/// Sets the Q flag if overflow occurs on the addition.
203+
#[inline]
204+
#[cfg_attr(test, assert_instr(smlatt))]
205+
pub unsafe fn __smlatt(a: int16x2_t, b: int16x2_t, c: i32) -> i32 {
206+
arm_smlatt(transmute(a), transmute(b), c)
207+
}
208+
209+
/// Insert a SMLAWB instruction
210+
///
211+
/// Returns the equivalent of (a * b[0] + (c << 16)) >> 16
212+
/// where [0] is the lower 16 bits and [1] is the upper 16 bits.
213+
/// Sets the Q flag if overflow occurs on the addition.
214+
#[inline]
215+
#[cfg_attr(test, assert_instr(smlawb))]
216+
pub unsafe fn __smlawb(a: i32, b: int16x2_t, c: i32) -> i32 {
217+
arm_smlawb(a, transmute(b), c)
218+
}
219+
220+
/// Insert a SMLAWT instruction
221+
///
222+
/// Returns the equivalent of (a * b[1] + (c << 16)) >> 16
223+
/// where [0] is the lower 16 bits and [1] is the upper 16 bits.
224+
/// Sets the Q flag if overflow occurs on the addition.
225+
#[inline]
226+
#[cfg_attr(test, assert_instr(smlawt))]
227+
pub unsafe fn __smlawt(a: i32, b: int16x2_t, c: i32) -> i32 {
228+
arm_smlawt(a, transmute(b), c)
229+
}
230+
53231
#[cfg(test)]
54232
mod tests {
55233
use crate::core_arch::arm::*;
56-
use std::mem;
234+
use crate::core_arch::simd::{i16x2, i8x4, u8x4};
235+
use std::mem::transmute;
57236
use stdsimd_test::simd_test;
58237

238+
#[test]
239+
fn smulbb() {
240+
unsafe {
241+
let a = i16x2::new(10, 20);
242+
let b = i16x2::new(30, 40);
243+
assert_eq!(super::__smulbb(transmute(a), transmute(b)), 10 * 30);
244+
}
245+
}
246+
247+
#[test]
248+
fn smulbt() {
249+
unsafe {
250+
let a = i16x2::new(10, 20);
251+
let b = i16x2::new(30, 40);
252+
assert_eq!(super::__smulbt(transmute(a), transmute(b)), 10 * 40);
253+
}
254+
}
255+
256+
#[test]
257+
fn smultb() {
258+
unsafe {
259+
let a = i16x2::new(10, 20);
260+
let b = i16x2::new(30, 40);
261+
assert_eq!(super::__smultb(transmute(a), transmute(b)), 20 * 30);
262+
}
263+
}
264+
265+
#[test]
266+
fn smultt() {
267+
unsafe {
268+
let a = i16x2::new(10, 20);
269+
let b = i16x2::new(30, 40);
270+
assert_eq!(super::__smultt(transmute(a), transmute(b)), 20 * 40);
271+
}
272+
}
273+
274+
#[test]
275+
fn smulwb() {
276+
unsafe {
277+
let a = i16x2::new(10, 20);
278+
let b = 30;
279+
assert_eq!(super::__smulwb(transmute(a), b), 20 * b);
280+
}
281+
}
282+
283+
#[test]
284+
fn smulwt() {
285+
unsafe {
286+
let a = i16x2::new(10, 20);
287+
let b = 30;
288+
assert_eq!(super::__smulwt(transmute(a), b), (10 * b) >> 16);
289+
}
290+
}
291+
59292
#[test]
60293
fn qadd() {
61294
unsafe {
@@ -73,4 +306,72 @@ mod tests {
73306
assert_eq!(super::__qsub(::std::i32::MIN, 10), ::std::i32::MIN);
74307
}
75308
}
309+
310+
fn qdbl() {
311+
unsafe {
312+
assert_eq!(super::__qdbl(10), 20);
313+
assert_eq!(super::__qdbl(::std::i32::MAX), ::std::i32::MAX);
314+
}
315+
}
316+
317+
fn smlabb() {
318+
unsafe {
319+
let a = i16x2::new(10, 20);
320+
let b = i16x2::new(30, 40);
321+
let c = 50;
322+
let r = (10 * 30) + c;
323+
assert_eq!(super::__smlabb(transmute(a), transmute(b), c), r);
324+
}
325+
}
326+
327+
fn smlabt() {
328+
unsafe {
329+
let a = i16x2::new(10, 20);
330+
let b = i16x2::new(30, 40);
331+
let c = 50;
332+
let r = (10 * 40) + c;
333+
assert_eq!(super::__smlabt(transmute(a), transmute(b), c), r);
334+
}
335+
}
336+
337+
fn smlatb() {
338+
unsafe {
339+
let a = i16x2::new(10, 20);
340+
let b = i16x2::new(30, 40);
341+
let c = 50;
342+
let r = (20 * 30) + c;
343+
assert_eq!(super::__smlabt(transmute(a), transmute(b), c), r);
344+
}
345+
}
346+
347+
fn smlatt() {
348+
unsafe {
349+
let a = i16x2::new(10, 20);
350+
let b = i16x2::new(30, 40);
351+
let c = 50;
352+
let r = (20 * 40) + c;
353+
assert_eq!(super::__smlatt(transmute(a), transmute(b), c), r);
354+
}
355+
}
356+
357+
fn smlawb() {
358+
unsafe {
359+
let a: i32 = 10;
360+
let b = i16x2::new(30, 40);
361+
let c: i32 = 50;
362+
let r: i32 = ((a * 30) + (c << 16)) >> 16;
363+
assert_eq!(super::__smlawb(a, transmute(b), c), r);
364+
}
365+
}
366+
367+
fn smlawt() {
368+
unsafe {
369+
let a: i32 = 10;
370+
let b = i16x2::new(30, 40);
371+
let c: i32 = 50;
372+
let r: i32 = ((a * 40) + (c << 16)) >> 16;
373+
assert_eq!(super::__smlawt(a, transmute(b), c), r);
374+
}
375+
}
376+
76377
}

0 commit comments

Comments
 (0)