@@ -579,6 +579,51 @@ pub unsafe fn _mm512_mask_i64scatter_pd(
579
579
constify_imm8_gather ! ( scale, call) ;
580
580
}
581
581
582
+ /// Scatter single-precision (32-bit) floating-point elements from memory using 32-bit indices.
583
+ ///
584
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32scatter_ps)
585
+ #[ inline]
586
+ #[ target_feature( enable = "avx512f" ) ]
587
+ #[ cfg_attr( test, assert_instr( vscatterdps, scale = 1 ) ) ]
588
+ #[ rustc_args_required_const( 3 ) ]
589
+ pub unsafe fn _mm512_i32scatter_ps ( slice : * mut u8 , offsets : __m512i , src : __m512 , scale : i32 ) {
590
+ let src = src. as_f32x16 ( ) ;
591
+ let neg_one = -1 ;
592
+ let slice = slice as * mut i8 ;
593
+ let offsets = offsets. as_i32x16 ( ) ;
594
+ macro_rules! call {
595
+ ( $imm8: expr) => {
596
+ vscatterdps( slice, neg_one, offsets, src, $imm8)
597
+ } ;
598
+ }
599
+ constify_imm8_gather ! ( scale, call) ;
600
+ }
601
+
602
+ /// Scatter single-precision (32-bit) floating-point elements from src into memory using 32-bit indices.
603
+ ///
604
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32scatter_ps)
605
+ #[ inline]
606
+ #[ target_feature( enable = "avx512f" ) ]
607
+ #[ cfg_attr( test, assert_instr( vscatterdps, scale = 1 ) ) ]
608
+ #[ rustc_args_required_const( 4 ) ]
609
+ pub unsafe fn _mm512_mask_i32scatter_ps (
610
+ slice : * mut u8 ,
611
+ mask : __mmask16 ,
612
+ offsets : __m512i ,
613
+ src : __m512 ,
614
+ scale : i32 ,
615
+ ) {
616
+ let src = src. as_f32x16 ( ) ;
617
+ let slice = slice as * mut i8 ;
618
+ let offsets = offsets. as_i32x16 ( ) ;
619
+ macro_rules! call {
620
+ ( $imm8: expr) => {
621
+ vscatterdps( slice, mask as i16 , offsets, src, $imm8)
622
+ } ;
623
+ }
624
+ constify_imm8_gather ! ( scale, call) ;
625
+ }
626
+
582
627
/// Scatter single-precision (32-bit) floating-point elements from src into memory using 64-bit indices.
583
628
///
584
629
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_ps)
@@ -716,6 +761,52 @@ pub unsafe fn _mm512_mask_i64scatter_epi64(
716
761
constify_imm8_gather ! ( scale, call) ;
717
762
}
718
763
764
+ /// Scatter 32-bit integers from src into memory using 32-bit indices.
765
+ ///
766
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_epi32)
767
+ #[ inline]
768
+ #[ target_feature( enable = "avx512f" ) ]
769
+ #[ cfg_attr( test, assert_instr( vpscatterdd, scale = 1 ) ) ]
770
+ #[ rustc_args_required_const( 3 ) ]
771
+ pub unsafe fn _mm512_i32scatter_epi32 ( slice : * mut u8 , offsets : __m512i , src : __m512i , scale : i32 ) {
772
+ let src = src. as_i32x16 ( ) ;
773
+ let neg_one = -1 ;
774
+ let slice = slice as * mut i8 ;
775
+ let offsets = offsets. as_i32x16 ( ) ;
776
+ macro_rules! call {
777
+ ( $imm8: expr) => {
778
+ vpscatterdd( slice, neg_one, offsets, src, $imm8)
779
+ } ;
780
+ }
781
+ constify_imm8_gather ! ( scale, call) ;
782
+ }
783
+
784
+ /// Scatter 32-bit integers from src into memory using 32-bit indices.
785
+ ///
786
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32scatter_epi32)
787
+ #[ inline]
788
+ #[ target_feature( enable = "avx512f" ) ]
789
+ #[ cfg_attr( test, assert_instr( vpscatterdd, scale = 1 ) ) ]
790
+ #[ rustc_args_required_const( 4 ) ]
791
+ pub unsafe fn _mm512_mask_i32scatter_epi32 (
792
+ slice : * mut u8 ,
793
+ mask : __mmask16 ,
794
+ offsets : __m512i ,
795
+ src : __m512i ,
796
+ scale : i32 ,
797
+ ) {
798
+ let src = src. as_i32x16 ( ) ;
799
+ let mask = mask as i16 ;
800
+ let slice = slice as * mut i8 ;
801
+ let offsets = offsets. as_i32x16 ( ) ;
802
+ macro_rules! call {
803
+ ( $imm8: expr) => {
804
+ vpscatterdd( slice, mask, offsets, src, $imm8)
805
+ } ;
806
+ }
807
+ constify_imm8_gather ! ( scale, call) ;
808
+ }
809
+
719
810
/// Scatter 32-bit integers from src into memory using 64-bit indices.
720
811
///
721
812
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i64scatter_epi32)
@@ -1580,6 +1671,8 @@ extern "C" {
1580
1671
1581
1672
#[ link_name = "llvm.x86.avx512.scatter.dpd.512" ]
1582
1673
fn vscatterdpd ( slice : * mut i8 , mask : i8 , offsets : i32x8 , src : f64x8 , scale : i32 ) ;
1674
+ #[ link_name = "llvm.x86.avx512.scatter.dps.512" ]
1675
+ fn vscatterdps ( slice : * mut i8 , mask : i16 , offsets : i32x16 , src : f32x16 , scale : i32 ) ;
1583
1676
#[ link_name = "llvm.x86.avx512.scatter.qpd.512" ]
1584
1677
fn vscatterqpd ( slice : * mut i8 , mask : i8 , offsets : i64x8 , src : f64x8 , scale : i32 ) ;
1585
1678
#[ link_name = "llvm.x86.avx512.scatter.qps.512" ]
@@ -1767,12 +1860,83 @@ mod tests {
1767
1860
let mask = 0b10101010_10101010 ;
1768
1861
#[ rustfmt:: skip]
1769
1862
let index = _mm512_setr_epi32 ( 0 , 16 , 32 , 48 , 64 , 80 , 96 , 112 ,
1770
- 120 , 128 , 136 , 144 , 152 , 160 , 168 , 176 ) ;
1863
+ 128 , 144 , 160 , 176 , 192 , 208 , 224 , 240 ) ;
1771
1864
// A multiplier of 4 is word-addressing
1772
1865
let r = _mm512_mask_i32gather_epi32 ( src, mask, index, arr. as_ptr ( ) as * const u8 , 4 ) ;
1773
1866
#[ rustfmt:: skip]
1774
1867
assert_eq_m512i ( r, _mm512_setr_epi32 ( 2 , 16 , 2 , 48 , 2 , 80 , 2 , 112 ,
1775
- 2 , 128 , 2 , 144 , 2 , 160 , 2 , 176 ) ) ;
1868
+ 2 , 144 , 2 , 176 , 2 , 208 , 2 , 240 ) ) ;
1869
+ }
1870
+
1871
+ #[ simd_test( enable = "avx512f" ) ]
1872
+ unsafe fn test_mm512_i32scatter_ps ( ) {
1873
+ let mut arr = [ 0f32 ; 256 ] ;
1874
+ #[ rustfmt:: skip]
1875
+ let index = _mm512_setr_epi32 ( 0 , 16 , 32 , 48 , 64 , 80 , 96 , 112 ,
1876
+ 128 , 144 , 160 , 176 , 192 , 208 , 224 , 240 ) ;
1877
+ let src = _mm512_setr_ps (
1878
+ 1. , 2. , 3. , 4. , 5. , 6. , 7. , 8. , 9. , 10. , 11. , 12. , 13. , 14. , 15. , 16. ,
1879
+ ) ;
1880
+ // A multiplier of 4 is word-addressing
1881
+ _mm512_i32scatter_ps ( arr. as_mut_ptr ( ) as * mut u8 , index, src, 4 ) ;
1882
+ let mut expected = [ 0f32 ; 256 ] ;
1883
+ for i in 0 ..16 {
1884
+ expected[ i * 16 ] = ( i + 1 ) as f32 ;
1885
+ }
1886
+ assert_eq ! ( & arr[ ..] , & expected[ ..] , ) ;
1887
+ }
1888
+
1889
+ #[ simd_test( enable = "avx512f" ) ]
1890
+ unsafe fn test_mm512_mask_i32scatter_ps ( ) {
1891
+ let mut arr = [ 0f32 ; 256 ] ;
1892
+ let mask = 0b10101010_10101010 ;
1893
+ #[ rustfmt:: skip]
1894
+ let index = _mm512_setr_epi32 ( 0 , 16 , 32 , 48 , 64 , 80 , 96 , 112 ,
1895
+ 128 , 144 , 160 , 176 , 192 , 208 , 224 , 240 ) ;
1896
+ let src = _mm512_setr_ps (
1897
+ 1. , 2. , 3. , 4. , 5. , 6. , 7. , 8. , 9. , 10. , 11. , 12. , 13. , 14. , 15. , 16. ,
1898
+ ) ;
1899
+ // A multiplier of 4 is word-addressing
1900
+ _mm512_mask_i32scatter_ps ( arr. as_mut_ptr ( ) as * mut u8 , mask, index, src, 4 ) ;
1901
+ let mut expected = [ 0f32 ; 256 ] ;
1902
+ for i in 0 ..8 {
1903
+ expected[ i * 32 + 16 ] = 2. * ( i + 1 ) as f32 ;
1904
+ }
1905
+ assert_eq ! ( & arr[ ..] , & expected[ ..] , ) ;
1906
+ }
1907
+
1908
+ #[ simd_test( enable = "avx512f" ) ]
1909
+ unsafe fn test_mm512_i32scatter_epi32 ( ) {
1910
+ let mut arr = [ 0i32 ; 256 ] ;
1911
+ #[ rustfmt:: skip]
1912
+
1913
+ let index = _mm512_setr_epi32 ( 0 , 16 , 32 , 48 , 64 , 80 , 96 , 112 ,
1914
+ 128 , 144 , 160 , 176 , 192 , 208 , 224 , 240 ) ;
1915
+ let src = _mm512_setr_epi32 ( 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 ) ;
1916
+ // A multiplier of 4 is word-addressing
1917
+ _mm512_i32scatter_epi32 ( arr. as_mut_ptr ( ) as * mut u8 , index, src, 4 ) ;
1918
+ let mut expected = [ 0i32 ; 256 ] ;
1919
+ for i in 0 ..16 {
1920
+ expected[ i * 16 ] = ( i + 1 ) as i32 ;
1921
+ }
1922
+ assert_eq ! ( & arr[ ..] , & expected[ ..] , ) ;
1923
+ }
1924
+
1925
+ #[ simd_test( enable = "avx512f" ) ]
1926
+ unsafe fn test_mm512_mask_i32scatter_epi32 ( ) {
1927
+ let mut arr = [ 0i32 ; 256 ] ;
1928
+ let mask = 0b10101010_10101010 ;
1929
+ #[ rustfmt:: skip]
1930
+ let index = _mm512_setr_epi32 ( 0 , 16 , 32 , 48 , 64 , 80 , 96 , 112 ,
1931
+ 128 , 144 , 160 , 176 , 192 , 208 , 224 , 240 ) ;
1932
+ let src = _mm512_setr_epi32 ( 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 ) ;
1933
+ // A multiplier of 4 is word-addressing
1934
+ _mm512_mask_i32scatter_epi32 ( arr. as_mut_ptr ( ) as * mut u8 , mask, index, src, 4 ) ;
1935
+ let mut expected = [ 0i32 ; 256 ] ;
1936
+ for i in 0 ..8 {
1937
+ expected[ i * 32 + 16 ] = 2 * ( i + 1 ) as i32 ;
1938
+ }
1939
+ assert_eq ! ( & arr[ ..] , & expected[ ..] , ) ;
1776
1940
}
1777
1941
1778
1942
#[ simd_test( enable = "avx512f" ) ]
0 commit comments