@@ -79,6 +79,40 @@ extern "C" {
79
79
#[ link_name = "llvm.aarch64.neon.abs.v2i64" ]
80
80
fn vabsq_s64_ ( a : int64x2_t ) -> int64x2_t ;
81
81
82
+ #[ link_name = "llvm.aarch64.neon.suqadd.v8i8" ]
83
+ fn vuqadd_s8_ ( a : int8x8_t , b : uint8x8_t ) -> int8x8_t ;
84
+ #[ link_name = "llvm.aarch64.neon.suqadd.v16i8" ]
85
+ fn vuqaddq_s8_ ( a : int8x16_t , b : uint8x16_t ) -> int8x16_t ;
86
+ #[ link_name = "llvm.aarch64.neon.suqadd.v4i16" ]
87
+ fn vuqadd_s16_ ( a : int16x4_t , b : uint16x4_t ) -> int16x4_t ;
88
+ #[ link_name = "llvm.aarch64.neon.suqadd.v8i16" ]
89
+ fn vuqaddq_s16_ ( a : int16x8_t , b : uint16x8_t ) -> int16x8_t ;
90
+ #[ link_name = "llvm.aarch64.neon.suqadd.v2i32" ]
91
+ fn vuqadd_s32_ ( a : int32x2_t , b : uint32x2_t ) -> int32x2_t ;
92
+ #[ link_name = "llvm.aarch64.neon.suqadd.v4i32" ]
93
+ fn vuqaddq_s32_ ( a : int32x4_t , b : uint32x4_t ) -> int32x4_t ;
94
+ #[ link_name = "llvm.aarch64.neon.suqadd.v1i64" ]
95
+ fn vuqadd_s64_ ( a : int64x1_t , b : uint64x1_t ) -> int64x1_t ;
96
+ #[ link_name = "llvm.aarch64.neon.suqadd.v2i64" ]
97
+ fn vuqaddq_s64_ ( a : int64x2_t , b : uint64x2_t ) -> int64x2_t ;
98
+
99
+ #[ link_name = "llvm.aarch64.neon.usqadd.v8i8" ]
100
+ fn vsqadd_u8_ ( a : uint8x8_t , b : int8x8_t ) -> uint8x8_t ;
101
+ #[ link_name = "llvm.aarch64.neon.usqadd.v16i8" ]
102
+ fn vsqaddq_u8_ ( a : uint8x16_t , b : int8x16_t ) -> uint8x16_t ;
103
+ #[ link_name = "llvm.aarch64.neon.usqadd.v4i16" ]
104
+ fn vsqadd_u16_ ( a : uint16x4_t , b : int16x4_t ) -> uint16x4_t ;
105
+ #[ link_name = "llvm.aarch64.neon.usqadd.v8i16" ]
106
+ fn vsqaddq_u16_ ( a : uint16x8_t , b : int16x8_t ) -> uint16x8_t ;
107
+ #[ link_name = "llvm.aarch64.neon.usqadd.v2i32" ]
108
+ fn vsqadd_u32_ ( a : uint32x2_t , b : int32x2_t ) -> uint32x2_t ;
109
+ #[ link_name = "llvm.aarch64.neon.usqadd.v4i32" ]
110
+ fn vsqaddq_u32_ ( a : uint32x4_t , b : int32x4_t ) -> uint32x4_t ;
111
+ #[ link_name = "llvm.aarch64.neon.usqadd.v1i64" ]
112
+ fn vsqadd_u64_ ( a : uint64x1_t , b : int64x1_t ) -> uint64x1_t ;
113
+ #[ link_name = "llvm.aarch64.neon.usqadd.v2i64" ]
114
+ fn vsqaddq_u64_ ( a : uint64x2_t , b : int64x2_t ) -> uint64x2_t ;
115
+
82
116
#[ link_name = "llvm.aarch64.neon.pmull64" ]
83
117
fn vmull_p64_ ( a : i64 , b : i64 ) -> int8x16_t ;
84
118
@@ -348,6 +382,120 @@ pub unsafe fn vabsq_s64(a: int64x2_t) -> int64x2_t {
348
382
vabsq_s64_ ( a)
349
383
}
350
384
385
+ /// Signed saturating Accumulate of Unsigned value.
386
+ #[ inline]
387
+ #[ target_feature( enable = "neon" ) ]
388
+ #[ cfg_attr( test, assert_instr( suqadd) ) ]
389
+ pub unsafe fn vuqadd_s8 ( a : int8x8_t , b : uint8x8_t ) -> int8x8_t {
390
+ vuqadd_s8_ ( a, b)
391
+ }
392
+ /// Signed saturating Accumulate of Unsigned value.
393
+ #[ inline]
394
+ #[ target_feature( enable = "neon" ) ]
395
+ #[ cfg_attr( test, assert_instr( suqadd) ) ]
396
+ pub unsafe fn vuqaddq_s8 ( a : int8x16_t , b : uint8x16_t ) -> int8x16_t {
397
+ vuqaddq_s8_ ( a, b)
398
+ }
399
+ /// Signed saturating Accumulate of Unsigned value.
400
+ #[ inline]
401
+ #[ target_feature( enable = "neon" ) ]
402
+ #[ cfg_attr( test, assert_instr( suqadd) ) ]
403
+ pub unsafe fn vuqadd_s16 ( a : int16x4_t , b : uint16x4_t ) -> int16x4_t {
404
+ vuqadd_s16_ ( a, b)
405
+ }
406
+ /// Signed saturating Accumulate of Unsigned value.
407
+ #[ inline]
408
+ #[ target_feature( enable = "neon" ) ]
409
+ #[ cfg_attr( test, assert_instr( suqadd) ) ]
410
+ pub unsafe fn vuqaddq_s16 ( a : int16x8_t , b : uint16x8_t ) -> int16x8_t {
411
+ vuqaddq_s16_ ( a, b)
412
+ }
413
+ /// Signed saturating Accumulate of Unsigned value.
414
+ #[ inline]
415
+ #[ target_feature( enable = "neon" ) ]
416
+ #[ cfg_attr( test, assert_instr( suqadd) ) ]
417
+ pub unsafe fn vuqadd_s32 ( a : int32x2_t , b : uint32x2_t ) -> int32x2_t {
418
+ vuqadd_s32_ ( a, b)
419
+ }
420
+ /// Signed saturating Accumulate of Unsigned value.
421
+ #[ inline]
422
+ #[ target_feature( enable = "neon" ) ]
423
+ #[ cfg_attr( test, assert_instr( suqadd) ) ]
424
+ pub unsafe fn vuqaddq_s32 ( a : int32x4_t , b : uint32x4_t ) -> int32x4_t {
425
+ vuqaddq_s32_ ( a, b)
426
+ }
427
+ /// Signed saturating Accumulate of Unsigned value.
428
+ #[ inline]
429
+ #[ target_feature( enable = "neon" ) ]
430
+ #[ cfg_attr( test, assert_instr( suqadd) ) ]
431
+ pub unsafe fn vuqadd_s64 ( a : int64x1_t , b : uint64x1_t ) -> int64x1_t {
432
+ vuqadd_s64_ ( a, b)
433
+ }
434
+ /// Signed saturating Accumulate of Unsigned value.
435
+ #[ inline]
436
+ #[ target_feature( enable = "neon" ) ]
437
+ #[ cfg_attr( test, assert_instr( suqadd) ) ]
438
+ pub unsafe fn vuqaddq_s64 ( a : int64x2_t , b : uint64x2_t ) -> int64x2_t {
439
+ vuqaddq_s64_ ( a, b)
440
+ }
441
+
442
+ /// Unsigned saturating Accumulate of Signed value.
443
+ #[ inline]
444
+ #[ target_feature( enable = "neon" ) ]
445
+ #[ cfg_attr( test, assert_instr( usqadd) ) ]
446
+ pub unsafe fn vsqadd_u8 ( a : uint8x8_t , b : int8x8_t ) -> uint8x8_t {
447
+ vsqadd_u8_ ( a, b)
448
+ }
449
+ /// Unsigned saturating Accumulate of Signed value.
450
+ #[ inline]
451
+ #[ target_feature( enable = "neon" ) ]
452
+ #[ cfg_attr( test, assert_instr( usqadd) ) ]
453
+ pub unsafe fn vsqaddq_u8 ( a : uint8x16_t , b : int8x16_t ) -> uint8x16_t {
454
+ vsqaddq_u8_ ( a, b)
455
+ }
456
+ /// Unsigned saturating Accumulate of Signed value.
457
+ #[ inline]
458
+ #[ target_feature( enable = "neon" ) ]
459
+ #[ cfg_attr( test, assert_instr( usqadd) ) ]
460
+ pub unsafe fn vsqadd_u16 ( a : uint16x4_t , b : int16x4_t ) -> uint16x4_t {
461
+ vsqadd_u16_ ( a, b)
462
+ }
463
+ /// Unsigned saturating Accumulate of Signed value.
464
+ #[ inline]
465
+ #[ target_feature( enable = "neon" ) ]
466
+ #[ cfg_attr( test, assert_instr( usqadd) ) ]
467
+ pub unsafe fn vsqaddq_u16 ( a : uint16x8_t , b : int16x8_t ) -> uint16x8_t {
468
+ vsqaddq_u16_ ( a, b)
469
+ }
470
+ /// Unsigned saturating Accumulate of Signed value.
471
+ #[ inline]
472
+ #[ target_feature( enable = "neon" ) ]
473
+ #[ cfg_attr( test, assert_instr( usqadd) ) ]
474
+ pub unsafe fn vsqadd_u32 ( a : uint32x2_t , b : int32x2_t ) -> uint32x2_t {
475
+ vsqadd_u32_ ( a, b)
476
+ }
477
+ /// Unsigned saturating Accumulate of Signed value.
478
+ #[ inline]
479
+ #[ target_feature( enable = "neon" ) ]
480
+ #[ cfg_attr( test, assert_instr( usqadd) ) ]
481
+ pub unsafe fn vsqaddq_u32 ( a : uint32x4_t , b : int32x4_t ) -> uint32x4_t {
482
+ vsqaddq_u32_ ( a, b)
483
+ }
484
+ /// Unsigned saturating Accumulate of Signed value.
485
+ #[ inline]
486
+ #[ target_feature( enable = "neon" ) ]
487
+ #[ cfg_attr( test, assert_instr( usqadd) ) ]
488
+ pub unsafe fn vsqadd_u64 ( a : uint64x1_t , b : int64x1_t ) -> uint64x1_t {
489
+ vsqadd_u64_ ( a, b)
490
+ }
491
+ /// Unsigned saturating Accumulate of Signed value.
492
+ #[ inline]
493
+ #[ target_feature( enable = "neon" ) ]
494
+ #[ cfg_attr( test, assert_instr( usqadd) ) ]
495
+ pub unsafe fn vsqaddq_u64 ( a : uint64x2_t , b : int64x2_t ) -> uint64x2_t {
496
+ vsqaddq_u64_ ( a, b)
497
+ }
498
+
351
499
/// Add pairwise
352
500
#[ inline]
353
501
#[ target_feature( enable = "neon" ) ]
@@ -2328,6 +2476,170 @@ mod tests {
2328
2476
assert_eq ! ( r, e) ;
2329
2477
}
2330
2478
2479
+ #[ simd_test( enable = "neon" ) ]
2480
+ unsafe fn test_vuqadd_s8 ( ) {
2481
+ let a = i8x8:: new ( i8:: MIN , -3 , -2 , -1 , 0 , 1 , 2 , i8:: MAX ) ;
2482
+ let b = u8x8:: new ( u8:: MAX , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ;
2483
+ let e = i8x8:: new ( i8:: MAX , -2 , 0 , 2 , 4 , 6 , 8 , i8:: MAX ) ;
2484
+ let r: i8x8 = transmute ( vuqadd_s8 ( transmute ( a) , transmute ( b) ) ) ;
2485
+ assert_eq ! ( r, e) ;
2486
+ }
2487
+ #[ simd_test( enable = "neon" ) ]
2488
+ unsafe fn test_vuqaddq_s8 ( ) {
2489
+ let a = i8x16:: new (
2490
+ i8:: MIN ,
2491
+ -7 ,
2492
+ -6 ,
2493
+ -5 ,
2494
+ -4 ,
2495
+ -3 ,
2496
+ -2 ,
2497
+ -1 ,
2498
+ 0 ,
2499
+ 1 ,
2500
+ 2 ,
2501
+ 3 ,
2502
+ 4 ,
2503
+ 5 ,
2504
+ 6 ,
2505
+ i8:: MAX ,
2506
+ ) ;
2507
+ let b = u8x16:: new ( u8:: MAX , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
2508
+ let e = i8x16:: new (
2509
+ i8:: MAX ,
2510
+ -6 ,
2511
+ -4 ,
2512
+ -2 ,
2513
+ 0 ,
2514
+ 2 ,
2515
+ 4 ,
2516
+ 6 ,
2517
+ 8 ,
2518
+ 10 ,
2519
+ 12 ,
2520
+ 14 ,
2521
+ 16 ,
2522
+ 18 ,
2523
+ 20 ,
2524
+ i8:: MAX ,
2525
+ ) ;
2526
+ let r: i8x16 = transmute ( vuqaddq_s8 ( transmute ( a) , transmute ( b) ) ) ;
2527
+ assert_eq ! ( r, e) ;
2528
+ }
2529
+ #[ simd_test( enable = "neon" ) ]
2530
+ unsafe fn test_vuqadd_s16 ( ) {
2531
+ let a = i16x4:: new ( i16:: MIN , -1 , 0 , i16:: MAX ) ;
2532
+ let b = u16x4:: new ( u16:: MAX , 1 , 2 , 3 ) ;
2533
+ let e = i16x4:: new ( i16:: MAX , 0 , 2 , i16:: MAX ) ;
2534
+ let r: i16x4 = transmute ( vuqadd_s16 ( transmute ( a) , transmute ( b) ) ) ;
2535
+ assert_eq ! ( r, e) ;
2536
+ }
2537
+ #[ simd_test( enable = "neon" ) ]
2538
+ unsafe fn test_vuqaddq_s16 ( ) {
2539
+ let a = i16x8:: new ( i16:: MIN , -3 , -2 , -1 , 0 , 1 , 2 , i16:: MAX ) ;
2540
+ let b = u16x8:: new ( u16:: MAX , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ;
2541
+ let e = i16x8:: new ( i16:: MAX , -2 , 0 , 2 , 4 , 6 , 8 , i16:: MAX ) ;
2542
+ let r: i16x8 = transmute ( vuqaddq_s16 ( transmute ( a) , transmute ( b) ) ) ;
2543
+ assert_eq ! ( r, e) ;
2544
+ }
2545
+ #[ simd_test( enable = "neon" ) ]
2546
+ unsafe fn test_vuqadd_s32 ( ) {
2547
+ let a = i32x2:: new ( i32:: MIN , i32:: MAX ) ;
2548
+ let b = u32x2:: new ( u32:: MAX , 1 ) ;
2549
+ let e = i32x2:: new ( i32:: MAX , i32:: MAX ) ;
2550
+ let r: i32x2 = transmute ( vuqadd_s32 ( transmute ( a) , transmute ( b) ) ) ;
2551
+ assert_eq ! ( r, e) ;
2552
+ }
2553
+ #[ simd_test( enable = "neon" ) ]
2554
+ unsafe fn test_vuqaddq_s32 ( ) {
2555
+ let a = i32x4:: new ( i32:: MIN , -1 , 0 , i32:: MAX ) ;
2556
+ let b = u32x4:: new ( u32:: MAX , 1 , 2 , 3 ) ;
2557
+ let e = i32x4:: new ( i32:: MAX , 0 , 2 , i32:: MAX ) ;
2558
+ let r: i32x4 = transmute ( vuqaddq_s32 ( transmute ( a) , transmute ( b) ) ) ;
2559
+ assert_eq ! ( r, e) ;
2560
+ }
2561
+ #[ simd_test( enable = "neon" ) ]
2562
+ unsafe fn test_vuqadd_s64 ( ) {
2563
+ let a = i64x1:: new ( i64:: MIN ) ;
2564
+ let b = u64x1:: new ( u64:: MAX ) ;
2565
+ let e = i64x1:: new ( i64:: MAX ) ;
2566
+ let r: i64x1 = transmute ( vuqadd_s64 ( transmute ( a) , transmute ( b) ) ) ;
2567
+ assert_eq ! ( r, e) ;
2568
+ }
2569
+ #[ simd_test( enable = "neon" ) ]
2570
+ unsafe fn test_vuqaddq_s64 ( ) {
2571
+ let a = i64x2:: new ( i64:: MIN , i64:: MAX ) ;
2572
+ let b = u64x2:: new ( u64:: MAX , 1 ) ;
2573
+ let e = i64x2:: new ( i64:: MAX , i64:: MAX ) ;
2574
+ let r: i64x2 = transmute ( vuqaddq_s64 ( transmute ( a) , transmute ( b) ) ) ;
2575
+ assert_eq ! ( r, e) ;
2576
+ }
2577
+
2578
+ #[ simd_test( enable = "neon" ) ]
2579
+ unsafe fn test_vsqadd_u8 ( ) {
2580
+ let a = u8x8:: new ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , u8:: MAX ) ;
2581
+ let b = i8x8:: new ( i8:: MIN , -3 , -2 , -1 , 0 , 1 , 2 , 3 ) ;
2582
+ let e = u8x8:: new ( 0 , 0 , 0 , 2 , 4 , 6 , 8 , u8:: MAX ) ;
2583
+ let r: u8x8 = transmute ( vsqadd_u8 ( transmute ( a) , transmute ( b) ) ) ;
2584
+ assert_eq ! ( r, e) ;
2585
+ }
2586
+ #[ simd_test( enable = "neon" ) ]
2587
+ unsafe fn test_vsqaddq_u8 ( ) {
2588
+ let a = u8x16:: new ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , u8:: MAX ) ;
2589
+ let b = i8x16:: new ( i8:: MIN , -7 , -6 , -5 , -4 , -3 , -2 , -1 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ;
2590
+ let e = u8x16:: new ( 0 , 0 , 0 , 0 , 0 , 2 , 4 , 6 , 8 , 10 , 12 , 14 , 16 , 18 , 20 , u8:: MAX ) ;
2591
+ let r: u8x16 = transmute ( vsqaddq_u8 ( transmute ( a) , transmute ( b) ) ) ;
2592
+ assert_eq ! ( r, e) ;
2593
+ }
2594
+ #[ simd_test( enable = "neon" ) ]
2595
+ unsafe fn test_vsqadd_u16 ( ) {
2596
+ let a = u16x4:: new ( 0 , 1 , 2 , u16:: MAX ) ;
2597
+ let b = i16x4:: new ( i16:: MIN , -1 , 0 , 1 ) ;
2598
+ let e = u16x4:: new ( 0 , 0 , 2 , u16:: MAX ) ;
2599
+ let r: u16x4 = transmute ( vsqadd_u16 ( transmute ( a) , transmute ( b) ) ) ;
2600
+ assert_eq ! ( r, e) ;
2601
+ }
2602
+ #[ simd_test( enable = "neon" ) ]
2603
+ unsafe fn test_vsqaddq_u16 ( ) {
2604
+ let a = u16x8:: new ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , u16:: MAX ) ;
2605
+ let b = i16x8:: new ( i16:: MIN , -3 , -2 , -1 , 0 , 1 , 2 , 3 ) ;
2606
+ let e = u16x8:: new ( 0 , 0 , 0 , 2 , 4 , 6 , 8 , u16:: MAX ) ;
2607
+ let r: u16x8 = transmute ( vsqaddq_u16 ( transmute ( a) , transmute ( b) ) ) ;
2608
+ assert_eq ! ( r, e) ;
2609
+ }
2610
+ #[ simd_test( enable = "neon" ) ]
2611
+ unsafe fn test_vsqadd_u32 ( ) {
2612
+ let a = u32x2:: new ( 0 , u32:: MAX ) ;
2613
+ let b = i32x2:: new ( i32:: MIN , 1 ) ;
2614
+ let e = u32x2:: new ( 0 , u32:: MAX ) ;
2615
+ let r: u32x2 = transmute ( vsqadd_u32 ( transmute ( a) , transmute ( b) ) ) ;
2616
+ assert_eq ! ( r, e) ;
2617
+ }
2618
+ #[ simd_test( enable = "neon" ) ]
2619
+ unsafe fn test_vsqaddq_u32 ( ) {
2620
+ let a = u32x4:: new ( 0 , 1 , 2 , u32:: MAX ) ;
2621
+ let b = i32x4:: new ( i32:: MIN , -1 , 0 , 1 ) ;
2622
+ let e = u32x4:: new ( 0 , 0 , 2 , u32:: MAX ) ;
2623
+ let r: u32x4 = transmute ( vsqaddq_u32 ( transmute ( a) , transmute ( b) ) ) ;
2624
+ assert_eq ! ( r, e) ;
2625
+ }
2626
+ #[ simd_test( enable = "neon" ) ]
2627
+ unsafe fn test_vsqadd_u64 ( ) {
2628
+ let a = u64x1:: new ( 0 ) ;
2629
+ let b = i64x1:: new ( i64:: MIN ) ;
2630
+ let e = u64x1:: new ( 0 ) ;
2631
+ let r: u64x1 = transmute ( vsqadd_u64 ( transmute ( a) , transmute ( b) ) ) ;
2632
+ assert_eq ! ( r, e) ;
2633
+ }
2634
+ #[ simd_test( enable = "neon" ) ]
2635
+ unsafe fn test_vsqaddq_u64 ( ) {
2636
+ let a = u64x2:: new ( 0 , u64:: MAX ) ;
2637
+ let b = i64x2:: new ( i64:: MIN , 1 ) ;
2638
+ let e = u64x2:: new ( 0 , u64:: MAX ) ;
2639
+ let r: u64x2 = transmute ( vsqaddq_u64 ( transmute ( a) , transmute ( b) ) ) ;
2640
+ assert_eq ! ( r, e) ;
2641
+ }
2642
+
2331
2643
#[ simd_test( enable = "neon" ) ]
2332
2644
unsafe fn test_vpaddq_s16 ( ) {
2333
2645
let a = i16x8:: new ( 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ) ;
0 commit comments