@@ -383,6 +383,279 @@ pub unsafe fn _mm512_mask_i64gather_epi32(
383
383
transmute ( r)
384
384
}
385
385
386
+ /// Scatter double-precision (64-bit) floating-point elements from memory using 32-bit indices.
387
+ ///
388
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32scatter_pd)
389
+ #[ inline]
390
+ #[ target_feature( enable = "avx512f" ) ]
391
+ #[ cfg_attr( test, assert_instr( vscatterdpd, scale = 1 ) ) ]
392
+ #[ rustc_args_required_const( 3 ) ]
393
+ pub unsafe fn _mm512_i32scatter_pd ( slice : * mut u8 , offsets : __m256i , src : __m512d , scale : i32 ) {
394
+ let src = src. as_f64x8 ( ) ;
395
+ let neg_one = -1 ;
396
+ let slice = slice as * mut i8 ;
397
+ let offsets = offsets. as_i32x8 ( ) ;
398
+ macro_rules! call {
399
+ ( $imm8: expr) => {
400
+ vscatterdpd( slice, neg_one, offsets, src, $imm8)
401
+ } ;
402
+ }
403
+ constify_imm8_gather ! ( scale, call) ;
404
+ }
405
+
406
+ /// Scatter double-precision (64-bit) floating-point elements from src into memory using 32-bit indices.
407
+ ///
408
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32scatter_pd)
409
+ #[ inline]
410
+ #[ target_feature( enable = "avx512f" ) ]
411
+ #[ cfg_attr( test, assert_instr( vscatterdpd, scale = 1 ) ) ]
412
+ #[ rustc_args_required_const( 4 ) ]
413
+ pub unsafe fn _mm512_mask_i32scatter_pd (
414
+ slice : * mut u8 ,
415
+ mask : __mmask8 ,
416
+ offsets : __m256i ,
417
+ src : __m512d ,
418
+ scale : i32 ,
419
+ ) {
420
+ let src = src. as_f64x8 ( ) ;
421
+ let slice = slice as * mut i8 ;
422
+ let offsets = offsets. as_i32x8 ( ) ;
423
+ macro_rules! call {
424
+ ( $imm8: expr) => {
425
+ vscatterdpd( slice, mask as i8 , offsets, src, $imm8)
426
+ } ;
427
+ }
428
+ constify_imm8_gather ! ( scale, call) ;
429
+ }
430
+
431
+ /// Scatter double-precision (64-bit) floating-point elements from src into memory using 64-bit indices.
432
+ ///
433
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_pd)
434
+ #[ inline]
435
+ #[ target_feature( enable = "avx512f" ) ]
436
+ #[ cfg_attr( test, assert_instr( vscatterqpd, scale = 1 ) ) ]
437
+ #[ rustc_args_required_const( 3 ) ]
438
+ pub unsafe fn _mm512_i64scatter_pd ( slice : * mut u8 , offsets : __m512i , src : __m512d , scale : i32 ) {
439
+ let src = src. as_f64x8 ( ) ;
440
+ let neg_one = -1 ;
441
+ let slice = slice as * mut i8 ;
442
+ let offsets = offsets. as_i64x8 ( ) ;
443
+ macro_rules! call {
444
+ ( $imm8: expr) => {
445
+ vscatterqpd( slice, neg_one, offsets, src, $imm8)
446
+ } ;
447
+ }
448
+ constify_imm8_gather ! ( scale, call) ;
449
+ }
450
+
451
+ /// Scatter double-precision (64-bit) floating-point elements from src into memory using 64-bit indices.
452
+ ///
453
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64scatter_pd)
454
+ #[ inline]
455
+ #[ target_feature( enable = "avx512f" ) ]
456
+ #[ cfg_attr( test, assert_instr( vscatterqpd, scale = 1 ) ) ]
457
+ #[ rustc_args_required_const( 4 ) ]
458
+ pub unsafe fn _mm512_mask_i64scatter_pd (
459
+ slice : * mut u8 ,
460
+ mask : __mmask8 ,
461
+ offsets : __m512i ,
462
+ src : __m512d ,
463
+ scale : i32 ,
464
+ ) {
465
+ let src = src. as_f64x8 ( ) ;
466
+ let slice = slice as * mut i8 ;
467
+ let offsets = offsets. as_i64x8 ( ) ;
468
+ macro_rules! call {
469
+ ( $imm8: expr) => {
470
+ vscatterqpd( slice, mask as i8 , offsets, src, $imm8)
471
+ } ;
472
+ }
473
+ constify_imm8_gather ! ( scale, call) ;
474
+ }
475
+
476
+ /// Scatter single-precision (32-bit) floating-point elements from src into memory using 64-bit indices.
477
+ ///
478
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_ps)
479
+ #[ inline]
480
+ #[ target_feature( enable = "avx512f" ) ]
481
+ #[ cfg_attr( test, assert_instr( vscatterqps, scale = 1 ) ) ]
482
+ #[ rustc_args_required_const( 3 ) ]
483
+ pub unsafe fn _mm512_i64scatter_ps ( slice : * mut u8 , offsets : __m512i , src : __m256 , scale : i32 ) {
484
+ let src = src. as_f32x8 ( ) ;
485
+ let neg_one = -1 ;
486
+ let slice = slice as * mut i8 ;
487
+ let offsets = offsets. as_i64x8 ( ) ;
488
+ macro_rules! call {
489
+ ( $imm8: expr) => {
490
+ vscatterqps( slice, neg_one, offsets, src, $imm8)
491
+ } ;
492
+ }
493
+ constify_imm8_gather ! ( scale, call) ;
494
+ }
495
+
496
+ /// Scatter single-precision (32-bit) floating-point elements from src into memory using 64-bit indices.
497
+ ///
498
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64scatter_ps)
499
+ #[ inline]
500
+ #[ target_feature( enable = "avx512f" ) ]
501
+ #[ cfg_attr( test, assert_instr( vscatterqps, scale = 1 ) ) ]
502
+ #[ rustc_args_required_const( 4 ) ]
503
+ pub unsafe fn _mm512_mask_i64scatter_ps (
504
+ slice : * mut u8 ,
505
+ mask : __mmask8 ,
506
+ offsets : __m512i ,
507
+ src : __m256 ,
508
+ scale : i32 ,
509
+ ) {
510
+ let src = src. as_f32x8 ( ) ;
511
+ let slice = slice as * mut i8 ;
512
+ let offsets = offsets. as_i64x8 ( ) ;
513
+ macro_rules! call {
514
+ ( $imm8: expr) => {
515
+ vscatterqps( slice, mask as i8 , offsets, src, $imm8)
516
+ } ;
517
+ }
518
+ constify_imm8_gather ! ( scale, call) ;
519
+ }
520
+
521
+ /// Scatter 64-bit integers from src into memory using 32-bit indices.
522
+ ///
523
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32scatter_epi64)
524
+ #[ inline]
525
+ #[ target_feature( enable = "avx512f" ) ]
526
+ #[ cfg_attr( test, assert_instr( vpscatterdq, scale = 1 ) ) ]
527
+ #[ rustc_args_required_const( 3 ) ]
528
+ pub unsafe fn _mm512_i32scatter_epi64 ( slice : * mut u8 , offsets : __m256i , src : __m512i , scale : i32 ) {
529
+ let src = src. as_i64x8 ( ) ;
530
+ let neg_one = -1 ;
531
+ let slice = slice as * mut i8 ;
532
+ let offsets = offsets. as_i32x8 ( ) ;
533
+ macro_rules! call {
534
+ ( $imm8: expr) => {
535
+ vpscatterdq( slice, neg_one, offsets, src, $imm8)
536
+ } ;
537
+ }
538
+ constify_imm8_gather ! ( scale, call) ;
539
+ }
540
+
541
+ /// Scatter 64-bit integers from src into memory using 32-bit indices.
542
+ ///
543
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32scatter_epi64)
544
+ #[ inline]
545
+ #[ target_feature( enable = "avx512f" ) ]
546
+ #[ cfg_attr( test, assert_instr( vpscatterdq, scale = 1 ) ) ]
547
+ #[ rustc_args_required_const( 4 ) ]
548
+ pub unsafe fn _mm512_mask_i32scatter_epi64 (
549
+ slice : * mut u8 ,
550
+ mask : __mmask8 ,
551
+ offsets : __m256i ,
552
+ src : __m512i ,
553
+ scale : i32 ,
554
+ ) {
555
+ let src = src. as_i64x8 ( ) ;
556
+ let mask = mask as i8 ;
557
+ let slice = slice as * mut i8 ;
558
+ let offsets = offsets. as_i32x8 ( ) ;
559
+ macro_rules! call {
560
+ ( $imm8: expr) => {
561
+ vpscatterdq( slice, mask, offsets, src, $imm8)
562
+ } ;
563
+ }
564
+ constify_imm8_gather ! ( scale, call) ;
565
+ }
566
+
567
+ /// Scatter 64-bit integers from src into memory using 64-bit indices.
568
+ ///
569
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_epi64)
570
+ #[ inline]
571
+ #[ target_feature( enable = "avx512f" ) ]
572
+ #[ cfg_attr( test, assert_instr( vpscatterqq, scale = 1 ) ) ]
573
+ #[ rustc_args_required_const( 3 ) ]
574
+ pub unsafe fn _mm512_i64scatter_epi64 ( slice : * mut u8 , offsets : __m512i , src : __m512i , scale : i32 ) {
575
+ let src = src. as_i64x8 ( ) ;
576
+ let neg_one = -1 ;
577
+ let slice = slice as * mut i8 ;
578
+ let offsets = offsets. as_i64x8 ( ) ;
579
+ macro_rules! call {
580
+ ( $imm8: expr) => {
581
+ vpscatterqq( slice, neg_one, offsets, src, $imm8)
582
+ } ;
583
+ }
584
+ constify_imm8_gather ! ( scale, call) ;
585
+ }
586
+
587
+ /// Scatter 64-bit integers from src into memory using 64-bit indices.
588
+ ///
589
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64scatter_epi64)
590
+ #[ inline]
591
+ #[ target_feature( enable = "avx512f" ) ]
592
+ #[ cfg_attr( test, assert_instr( vpscatterqq, scale = 1 ) ) ]
593
+ #[ rustc_args_required_const( 4 ) ]
594
+ pub unsafe fn _mm512_mask_i64scatter_epi64 (
595
+ slice : * mut u8 ,
596
+ mask : __mmask8 ,
597
+ offsets : __m512i ,
598
+ src : __m512i ,
599
+ scale : i32 ,
600
+ ) {
601
+ let src = src. as_i64x8 ( ) ;
602
+ let mask = mask as i8 ;
603
+ let slice = slice as * mut i8 ;
604
+ let offsets = offsets. as_i64x8 ( ) ;
605
+ macro_rules! call {
606
+ ( $imm8: expr) => {
607
+ vpscatterqq( slice, mask, offsets, src, $imm8)
608
+ } ;
609
+ }
610
+ constify_imm8_gather ! ( scale, call) ;
611
+ }
612
+
613
+ /// Scatter 32-bit integers from src into memory using 64-bit indices.
614
+ ///
615
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_epi32)
616
+ #[ inline]
617
+ #[ target_feature( enable = "avx512f" ) ]
618
+ #[ cfg_attr( test, assert_instr( vpscatterqd, scale = 1 ) ) ]
619
+ #[ rustc_args_required_const( 3 ) ]
620
+ pub unsafe fn _mm512_i64scatter_epi32 ( slice : * mut u8 , offsets : __m512i , src : __m256i , scale : i32 ) {
621
+ let src = src. as_i32x8 ( ) ;
622
+ let neg_one = -1 ;
623
+ let slice = slice as * mut i8 ;
624
+ let offsets = offsets. as_i64x8 ( ) ;
625
+ macro_rules! call {
626
+ ( $imm8: expr) => {
627
+ vpscatterqd( slice, neg_one, offsets, src, $imm8)
628
+ } ;
629
+ }
630
+ constify_imm8_gather ! ( scale, call) ;
631
+ }
632
+
633
+ /// Scatter 32-bit integers from src into memory using 64-bit indices.
634
+ ///
635
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i64scatter_epi32)
636
+ #[ inline]
637
+ #[ target_feature( enable = "avx512f" ) ]
638
+ #[ cfg_attr( test, assert_instr( vpscatterqd, scale = 1 ) ) ]
639
+ #[ rustc_args_required_const( 4 ) ]
640
+ pub unsafe fn _mm512_mask_i64scatter_epi32 (
641
+ slice : * mut u8 ,
642
+ mask : __mmask8 ,
643
+ offsets : __m512i ,
644
+ src : __m256i ,
645
+ scale : i32 ,
646
+ ) {
647
+ let src = src. as_i32x8 ( ) ;
648
+ let mask = mask as i8 ;
649
+ let slice = slice as * mut i8 ;
650
+ let offsets = offsets. as_i64x8 ( ) ;
651
+ macro_rules! call {
652
+ ( $imm8: expr) => {
653
+ vpscatterqd( slice, mask, offsets, src, $imm8)
654
+ } ;
655
+ }
656
+ constify_imm8_gather ! ( scale, call) ;
657
+ }
658
+
386
659
#[ allow( improper_ctypes) ]
387
660
extern "C" {
388
661
#[ link_name = "llvm.x86.avx512.gather.dpd.512" ]
@@ -397,6 +670,20 @@ extern "C" {
397
670
fn vpgatherqq ( src : i64x8 , slice : * const i8 , offsets : i64x8 , mask : i8 , scale : i32 ) -> i64x8 ;
398
671
#[ link_name = "llvm.x86.avx512.gather.qpi.512" ]
399
672
fn vpgatherqd ( src : i32x8 , slice : * const i8 , offsets : i64x8 , mask : i8 , scale : i32 ) -> i32x8 ;
673
+
674
+ #[ link_name = "llvm.x86.avx512.scatter.dpd.512" ]
675
+ fn vscatterdpd ( slice : * mut i8 , mask : i8 , offsets : i32x8 , src : f64x8 , scale : i32 ) ;
676
+ #[ link_name = "llvm.x86.avx512.scatter.qpd.512" ]
677
+ fn vscatterqpd ( slice : * mut i8 , mask : i8 , offsets : i64x8 , src : f64x8 , scale : i32 ) ;
678
+ #[ link_name = "llvm.x86.avx512.scatter.qps.512" ]
679
+ fn vscatterqps ( slice : * mut i8 , mask : i8 , offsets : i64x8 , src : f32x8 , scale : i32 ) ;
680
+ #[ link_name = "llvm.x86.avx512.scatter.dpq.512" ]
681
+ fn vpscatterdq ( slice : * mut i8 , mask : i8 , offsets : i32x8 , src : i64x8 , scale : i32 ) ;
682
+ #[ link_name = "llvm.x86.avx512.scatter.qpq.512" ]
683
+ fn vpscatterqq ( slice : * mut i8 , mask : i8 , offsets : i64x8 , src : i64x8 , scale : i32 ) ;
684
+ #[ link_name = "llvm.x86.avx512.scatter.qpi.512" ]
685
+ fn vpscatterqd ( slice : * mut i8 , mask : i8 , offsets : i64x8 , src : i32x8 , scale : i32 ) ;
686
+
400
687
}
401
688
402
689
/// Broadcast 64-bit float `a` to all elements of `dst`.
0 commit comments