4
4
//!
5
5
//! Intrinsics that could live here:
6
6
//!
7
- //! - [ ] __smulbb
8
- //! - [ ] __smulbt
9
- //! - [ ] __smultb
10
- //! - [ ] __smultt
11
- //! - [ ] __smulwb
12
- //! - [ ] __smulwt
7
+ //! - [x ] __smulbb
8
+ //! - [x ] __smulbt
9
+ //! - [x ] __smultb
10
+ //! - [x ] __smultt
11
+ //! - [x ] __smulwb
12
+ //! - [x ] __smulwt
13
13
//! - [x] __qadd
14
14
//! - [x] __qsub
15
- //! - [ ] __qdbl
16
- //! - [ ] __smlabb
17
- //! - [ ] __smlabt
18
- //! - [ ] __smlatb
19
- //! - [ ] __smlatt
20
- //! - [ ] __smlawb
21
- //! - [ ] __smlawt
15
+ //! - [x ] __qdbl
16
+ //! - [x ] __smlabb
17
+ //! - [x ] __smlabt
18
+ //! - [x ] __smlatb
19
+ //! - [x ] __smlatt
20
+ //! - [x ] __smlawb
21
+ //! - [x ] __smlawt
22
22
23
23
#[ cfg( test) ]
24
24
use stdsimd_test:: assert_instr;
25
25
26
+ use crate :: mem:: transmute;
27
+ use core_arch:: acle:: simd32:: int16x2_t;
28
+
26
29
extern "C" {
30
+ #[ link_name = "llvm.arm.smulbb" ]
31
+ fn arm_smulbb ( a : i32 , b : i32 ) -> i32 ;
32
+
33
+ #[ link_name = "llvm.arm.smulbt" ]
34
+ fn arm_smulbt ( a : i32 , b : i32 ) -> i32 ;
35
+
36
+ #[ link_name = "llvm.arm.smultb" ]
37
+ fn arm_smultb ( a : i32 , b : i32 ) -> i32 ;
38
+
39
+ #[ link_name = "llvm.arm.smultt" ]
40
+ fn arm_smultt ( a : i32 , b : i32 ) -> i32 ;
41
+
42
+ #[ link_name = "llvm.arm.smulwb" ]
43
+ fn arm_smulwb ( a : i32 , b : i32 ) -> i32 ;
44
+
45
+ #[ link_name = "llvm.arm.smulwt" ]
46
+ fn arm_smulwt ( a : i32 , b : i32 ) -> i32 ;
47
+
27
48
#[ link_name = "llvm.arm.qadd" ]
28
49
fn arm_qadd ( a : i32 , b : i32 ) -> i32 ;
29
50
30
51
#[ link_name = "llvm.arm.qsub" ]
31
52
fn arm_qsub ( a : i32 , b : i32 ) -> i32 ;
32
53
54
+ #[ link_name = "llvm.arm.smlabb" ]
55
+ fn arm_smlabb ( a : i32 , b : i32 , c : i32 ) -> i32 ;
56
+
57
+ #[ link_name = "llvm.arm.smlabt" ]
58
+ fn arm_smlabt ( a : i32 , b : i32 , c : i32 ) -> i32 ;
59
+
60
+ #[ link_name = "llvm.arm.smlatb" ]
61
+ fn arm_smlatb ( a : i32 , b : i32 , c : i32 ) -> i32 ;
62
+
63
+ #[ link_name = "llvm.arm.smlatt" ]
64
+ fn arm_smlatt ( a : i32 , b : i32 , c : i32 ) -> i32 ;
65
+
66
+ #[ link_name = "llvm.arm.smlawb" ]
67
+ fn arm_smlawb ( a : i32 , b : i32 , c : i32 ) -> i32 ;
68
+
69
+ #[ link_name = "llvm.arm.smlawt" ]
70
+ fn arm_smlawt ( a : i32 , b : i32 , c : i32 ) -> i32 ;
71
+ }
72
+
73
+ /// Insert a SMULBB instruction
74
+ ///
75
+ /// Returns the equivalent of a\[0\] * b\[0\]
76
+ /// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
77
+ #[ inline]
78
+ #[ cfg_attr( test, assert_instr( smulbb) ) ]
79
+ pub unsafe fn __smulbb ( a : int16x2_t , b : int16x2_t ) -> i32 {
80
+ arm_smulbb ( transmute ( a) , transmute ( b) )
81
+ }
82
+
83
+ /// Insert a SMULTB instruction
84
+ ///
85
+ /// Returns the equivalent of a\[0\] * b\[1\]
86
+ /// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits.
87
+ #[ inline]
88
+ #[ cfg_attr( test, assert_instr( smultb) ) ]
89
+ pub unsafe fn __smultb ( a : int16x2_t , b : int16x2_t ) -> i32 {
90
+ arm_smultb ( transmute ( a) , transmute ( b) )
91
+ }
92
+
93
+ /// Insert a SMULTB instruction
94
+ ///
95
+ /// Returns the equivalent of a\[1\] * b\[0\]
96
+ /// where [0] is the lower 16 bits and [1] is the upper 16 bits.
97
+ #[ inline]
98
+ #[ cfg_attr( test, assert_instr( smulbt) ) ]
99
+ pub unsafe fn __smulbt ( a : int16x2_t , b : int16x2_t ) -> i32 {
100
+ arm_smulbt ( transmute ( a) , transmute ( b) )
101
+ }
102
+
103
+ /// Insert a SMULTT instruction
104
+ ///
105
+ /// Returns the equivalent of a\[1\] * b\[1\]
106
+ /// where [0] is the lower 16 bits and [1] is the upper 16 bits.
107
+ #[ inline]
108
+ #[ cfg_attr( test, assert_instr( smultt) ) ]
109
+ pub unsafe fn __smultt ( a : int16x2_t , b : int16x2_t ) -> i32 {
110
+ arm_smultt ( transmute ( a) , transmute ( b) )
111
+ }
112
+
113
+ /// Insert a SMULWB instruction
114
+ ///
115
+ /// Multiplies the 32-bit signed first operand with the low halfword
116
+ /// (as a 16-bit signed integer) of the second operand.
117
+ /// Return the top 32 bits of the 48-bit product
118
+ #[ inline]
119
+ #[ cfg_attr( test, assert_instr( smulwb) ) ]
120
+ pub unsafe fn __smulwb ( a : int16x2_t , b : i32 ) -> i32 {
121
+ arm_smulwb ( transmute ( a) , b)
122
+ }
123
+
124
+ /// Insert a SMULWT instruction
125
+ ///
126
+ /// Multiplies the 32-bit signed first operand with the high halfword
127
+ /// (as a 16-bit signed integer) of the second operand.
128
+ /// Return the top 32 bits of the 48-bit product
129
+ #[ inline]
130
+ #[ cfg_attr( test, assert_instr( smulwt) ) ]
131
+ pub unsafe fn __smulwt ( a : int16x2_t , b : i32 ) -> i32 {
132
+ arm_smulwt ( transmute ( a) , b)
33
133
}
34
134
35
135
/// Signed saturating addition
36
136
///
37
137
/// Returns the 32-bit saturating signed equivalent of a + b.
138
+ /// Sets the Q flag if saturation occurs.
38
139
#[ inline]
39
140
#[ cfg_attr( test, assert_instr( qadd) ) ]
40
141
pub unsafe fn __qadd ( a : i32 , b : i32 ) -> i32 {
@@ -44,18 +145,150 @@ pub unsafe fn __qadd(a: i32, b: i32) -> i32 {
44
145
/// Signed saturating subtraction
45
146
///
46
147
/// Returns the 32-bit saturating signed equivalent of a - b.
148
+ /// Sets the Q flag if saturation occurs.
47
149
#[ inline]
48
150
#[ cfg_attr( test, assert_instr( qsub) ) ]
49
151
pub unsafe fn __qsub ( a : i32 , b : i32 ) -> i32 {
50
152
arm_qsub ( a, b)
51
153
}
52
154
155
+ /// Insert a QADD instruction
156
+ ///
157
+ /// Returns the 32-bit saturating signed equivalent of a + a
158
+ /// Sets the Q flag if saturation occurs.
159
+ #[ inline]
160
+ #[ cfg_attr( test, assert_instr( qadd) ) ]
161
+ pub unsafe fn __qdbl ( a : i32 ) -> i32 {
162
+ arm_qadd ( a, a)
163
+ }
164
+
165
+ /// Insert a SMLABB instruction
166
+ ///
167
+ /// Returns the equivalent of a\[0\] * b\[0\] + c
168
+ /// where [0] is the lower 16 bits and [1] is the upper 16 bits.
169
+ /// Sets the Q flag if overflow occurs on the addition.
170
+ #[ inline]
171
+ #[ cfg_attr( test, assert_instr( smlabb) ) ]
172
+ pub unsafe fn __smlabb ( a : int16x2_t , b : int16x2_t , c : i32 ) -> i32 {
173
+ arm_smlabb ( transmute ( a) , transmute ( b) , c)
174
+ }
175
+
176
+ /// Insert a SMLABT instruction
177
+ ///
178
+ /// Returns the equivalent of a\[0\] * b\[1\] + c
179
+ /// where [0] is the lower 16 bits and [1] is the upper 16 bits.
180
+ /// Sets the Q flag if overflow occurs on the addition.
181
+ #[ inline]
182
+ #[ cfg_attr( test, assert_instr( smlabt) ) ]
183
+ pub unsafe fn __smlabt ( a : int16x2_t , b : int16x2_t , c : i32 ) -> i32 {
184
+ arm_smlabt ( transmute ( a) , transmute ( b) , c)
185
+ }
186
+
187
+ /// Insert a SMLATB instruction
188
+ ///
189
+ /// Returns the equivalent of a\[1\] * b\[0\] + c
190
+ /// where [0] is the lower 16 bits and [1] is the upper 16 bits.
191
+ /// Sets the Q flag if overflow occurs on the addition.
192
+ #[ inline]
193
+ #[ cfg_attr( test, assert_instr( smlatb) ) ]
194
+ pub unsafe fn __smlatb ( a : int16x2_t , b : int16x2_t , c : i32 ) -> i32 {
195
+ arm_smlatb ( transmute ( a) , transmute ( b) , c)
196
+ }
197
+
198
+ /// Insert a SMLATT instruction
199
+ ///
200
+ /// Returns the equivalent of a\[1\] * b\[1\] + c
201
+ /// where [0] is the lower 16 bits and [1] is the upper 16 bits.
202
+ /// Sets the Q flag if overflow occurs on the addition.
203
+ #[ inline]
204
+ #[ cfg_attr( test, assert_instr( smlatt) ) ]
205
+ pub unsafe fn __smlatt ( a : int16x2_t , b : int16x2_t , c : i32 ) -> i32 {
206
+ arm_smlatt ( transmute ( a) , transmute ( b) , c)
207
+ }
208
+
209
+ /// Insert a SMLAWB instruction
210
+ ///
211
+ /// Returns the equivalent of (a * b[0] + (c << 16)) >> 16
212
+ /// where [0] is the lower 16 bits and [1] is the upper 16 bits.
213
+ /// Sets the Q flag if overflow occurs on the addition.
214
+ #[ inline]
215
+ #[ cfg_attr( test, assert_instr( smlawb) ) ]
216
+ pub unsafe fn __smlawb ( a : i32 , b : int16x2_t , c : i32 ) -> i32 {
217
+ arm_smlawb ( a, transmute ( b) , c)
218
+ }
219
+
220
+ /// Insert a SMLAWT instruction
221
+ ///
222
+ /// Returns the equivalent of (a * b[1] + (c << 16)) >> 16
223
+ /// where [0] is the lower 16 bits and [1] is the upper 16 bits.
224
+ /// Sets the Q flag if overflow occurs on the addition.
225
+ #[ inline]
226
+ #[ cfg_attr( test, assert_instr( smlawt) ) ]
227
+ pub unsafe fn __smlawt ( a : i32 , b : int16x2_t , c : i32 ) -> i32 {
228
+ arm_smlawt ( a, transmute ( b) , c)
229
+ }
230
+
53
231
#[ cfg( test) ]
54
232
mod tests {
55
233
use crate :: core_arch:: arm:: * ;
56
- use std:: mem;
234
+ use crate :: core_arch:: simd:: { i16x2, i8x4, u8x4} ;
235
+ use std:: mem:: transmute;
57
236
use stdsimd_test:: simd_test;
58
237
238
+ #[ test]
239
+ fn smulbb ( ) {
240
+ unsafe {
241
+ let a = i16x2:: new ( 10 , 20 ) ;
242
+ let b = i16x2:: new ( 30 , 40 ) ;
243
+ assert_eq ! ( super :: __smulbb( transmute( a) , transmute( b) ) , 10 * 30 ) ;
244
+ }
245
+ }
246
+
247
+ #[ test]
248
+ fn smulbt ( ) {
249
+ unsafe {
250
+ let a = i16x2:: new ( 10 , 20 ) ;
251
+ let b = i16x2:: new ( 30 , 40 ) ;
252
+ assert_eq ! ( super :: __smulbt( transmute( a) , transmute( b) ) , 10 * 40 ) ;
253
+ }
254
+ }
255
+
256
+ #[ test]
257
+ fn smultb ( ) {
258
+ unsafe {
259
+ let a = i16x2:: new ( 10 , 20 ) ;
260
+ let b = i16x2:: new ( 30 , 40 ) ;
261
+ assert_eq ! ( super :: __smultb( transmute( a) , transmute( b) ) , 20 * 30 ) ;
262
+ }
263
+ }
264
+
265
+ #[ test]
266
+ fn smultt ( ) {
267
+ unsafe {
268
+ let a = i16x2:: new ( 10 , 20 ) ;
269
+ let b = i16x2:: new ( 30 , 40 ) ;
270
+ assert_eq ! ( super :: __smultt( transmute( a) , transmute( b) ) , 20 * 40 ) ;
271
+ }
272
+ }
273
+
274
+ #[ test]
275
+ fn smulwb ( ) {
276
+ unsafe {
277
+ let a = i16x2:: new ( 10 , 20 ) ;
278
+ let b = 30 ;
279
+ assert_eq ! ( super :: __smulwb( transmute( a) , b) , 20 * b) ;
280
+ }
281
+ }
282
+
283
+ #[ test]
284
+ fn smulwt ( ) {
285
+ unsafe {
286
+ let a = i16x2:: new ( 10 , 20 ) ;
287
+ let b = 30 ;
288
+ assert_eq ! ( super :: __smulwt( transmute( a) , b) , ( 10 * b) >> 16 ) ;
289
+ }
290
+ }
291
+
59
292
#[ test]
60
293
fn qadd ( ) {
61
294
unsafe {
@@ -73,4 +306,72 @@ mod tests {
73
306
assert_eq ! ( super :: __qsub( :: std:: i32 :: MIN , 10 ) , :: std:: i32 :: MIN ) ;
74
307
}
75
308
}
309
+
310
+ fn qdbl ( ) {
311
+ unsafe {
312
+ assert_eq ! ( super :: __qdbl( 10 ) , 20 ) ;
313
+ assert_eq ! ( super :: __qdbl( :: std:: i32 :: MAX ) , :: std:: i32 :: MAX ) ;
314
+ }
315
+ }
316
+
317
+ fn smlabb ( ) {
318
+ unsafe {
319
+ let a = i16x2:: new ( 10 , 20 ) ;
320
+ let b = i16x2:: new ( 30 , 40 ) ;
321
+ let c = 50 ;
322
+ let r = ( 10 * 30 ) + c;
323
+ assert_eq ! ( super :: __smlabb( transmute( a) , transmute( b) , c) , r) ;
324
+ }
325
+ }
326
+
327
+ fn smlabt ( ) {
328
+ unsafe {
329
+ let a = i16x2:: new ( 10 , 20 ) ;
330
+ let b = i16x2:: new ( 30 , 40 ) ;
331
+ let c = 50 ;
332
+ let r = ( 10 * 40 ) + c;
333
+ assert_eq ! ( super :: __smlabt( transmute( a) , transmute( b) , c) , r) ;
334
+ }
335
+ }
336
+
337
+ fn smlatb ( ) {
338
+ unsafe {
339
+ let a = i16x2:: new ( 10 , 20 ) ;
340
+ let b = i16x2:: new ( 30 , 40 ) ;
341
+ let c = 50 ;
342
+ let r = ( 20 * 30 ) + c;
343
+ assert_eq ! ( super :: __smlabt( transmute( a) , transmute( b) , c) , r) ;
344
+ }
345
+ }
346
+
347
+ fn smlatt ( ) {
348
+ unsafe {
349
+ let a = i16x2:: new ( 10 , 20 ) ;
350
+ let b = i16x2:: new ( 30 , 40 ) ;
351
+ let c = 50 ;
352
+ let r = ( 20 * 40 ) + c;
353
+ assert_eq ! ( super :: __smlatt( transmute( a) , transmute( b) , c) , r) ;
354
+ }
355
+ }
356
+
357
+ fn smlawb ( ) {
358
+ unsafe {
359
+ let a: i32 = 10 ;
360
+ let b = i16x2:: new ( 30 , 40 ) ;
361
+ let c: i32 = 50 ;
362
+ let r: i32 = ( ( a * 30 ) + ( c << 16 ) ) >> 16 ;
363
+ assert_eq ! ( super :: __smlawb( a, transmute( b) , c) , r) ;
364
+ }
365
+ }
366
+
367
+ fn smlawt ( ) {
368
+ unsafe {
369
+ let a: i32 = 10 ;
370
+ let b = i16x2:: new ( 30 , 40 ) ;
371
+ let c: i32 = 50 ;
372
+ let r: i32 = ( ( a * 40 ) + ( c << 16 ) ) >> 16 ;
373
+ assert_eq ! ( super :: __smlawt( a, transmute( b) , c) , r) ;
374
+ }
375
+ }
376
+
76
377
}
0 commit comments