@@ -965,6 +965,185 @@ pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
965
965
transmute ( i64x8:: splat ( a) )
966
966
}
967
967
968
+ /// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector.
969
+ ///
970
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_ps)
971
+ #[ inline]
972
+ #[ target_feature( enable = "avx512f" ) ]
973
+ #[ cfg_attr( test, assert_instr( vcmp) ) ]
974
+ pub unsafe fn _mm512_cmplt_ps_mask ( a : __m512 , b : __m512 ) -> __mmask16 {
975
+ _mm512_cmp_ps_mask ( a, b, _CMP_LT_OQ)
976
+ }
977
+
978
+ /// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k
979
+ /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
980
+ ///
981
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_ps)
982
+ #[ inline]
983
+ #[ target_feature( enable = "avx512f" ) ]
984
+ #[ cfg_attr( test, assert_instr( vcmp) ) ]
985
+ pub unsafe fn _mm512_mask_cmplt_ps_mask ( m : __mmask16 , a : __m512 , b : __m512 ) -> __mmask16 {
986
+ _mm512_mask_cmp_ps_mask ( m, a, b, _CMP_LT_OQ)
987
+ }
988
+
989
+ /// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
990
+ ///
991
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_ps)
992
+ #[ inline]
993
+ #[ target_feature( enable = "avx512f" ) ]
994
+ #[ cfg_attr( test, assert_instr( vcmp) ) ]
995
+ pub unsafe fn _mm512_cmpgt_ps_mask ( a : __m512 , b : __m512 ) -> __mmask16 {
996
+ _mm512_cmp_ps_mask ( a, b, _CMP_GT_OQ)
997
+ }
998
+
999
+ /// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
1000
+ /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
1001
+ ///
1002
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_ps)
1003
+ #[ inline]
1004
+ #[ target_feature( enable = "avx512f" ) ]
1005
+ #[ cfg_attr( test, assert_instr( vcmp) ) ]
1006
+ pub unsafe fn _mm512_mask_cmpgt_ps_mask ( m : __mmask16 , a : __m512 , b : __m512 ) -> __mmask16 {
1007
+ _mm512_mask_cmp_ps_mask ( m, a, b, _CMP_GT_OQ)
1008
+ }
1009
+
1010
+ /// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector.
1011
+ ///
1012
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_ps)
1013
+ #[ inline]
1014
+ #[ target_feature( enable = "avx512f" ) ]
1015
+ #[ cfg_attr( test, assert_instr( vcmp) ) ]
1016
+ pub unsafe fn _mm512_cmple_ps_mask ( a : __m512 , b : __m512 ) -> __mmask16 {
1017
+ _mm512_cmp_ps_mask ( a, b, _CMP_LE_OQ)
1018
+ }
1019
+
1020
+ /// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k
1021
+ /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
1022
+ ///
1023
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_ps)
1024
+ #[ inline]
1025
+ #[ target_feature( enable = "avx512f" ) ]
1026
+ #[ cfg_attr( test, assert_instr( vcmp) ) ]
1027
+ pub unsafe fn _mm512_mask_cmple_ps_mask ( m : __mmask16 , a : __m512 , b : __m512 ) -> __mmask16 {
1028
+ _mm512_mask_cmp_ps_mask ( m, a, b, _CMP_LE_OQ)
1029
+ }
1030
+
1031
+ /// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than-or-equal, and store the results in a mask vector.
1032
+ ///
1033
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_ps)
1034
+ #[ inline]
1035
+ #[ target_feature( enable = "avx512f" ) ]
1036
+ #[ cfg_attr( test, assert_instr( vcmp) ) ]
1037
+ pub unsafe fn _mm512_cmpge_ps_mask ( a : __m512 , b : __m512 ) -> __mmask16 {
1038
+ _mm512_cmp_ps_mask ( a, b, _CMP_LT_OQ)
1039
+ }
1040
+
1041
+ /// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than-or-equal, and store the results in a mask vector k
1042
+ /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
1043
+ ///
1044
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_ps)
1045
+ #[ inline]
1046
+ #[ target_feature( enable = "avx512f" ) ]
1047
+ #[ cfg_attr( test, assert_instr( vcmp) ) ]
1048
+ pub unsafe fn _mm512_mask_cmpge_ps_mask ( m : __mmask16 , a : __m512 , b : __m512 ) -> __mmask16 {
1049
+ _mm512_mask_cmp_ps_mask ( m, a, b, _CMP_LT_OQ)
1050
+ }
1051
+
1052
+ /// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector.
1053
+ ///
1054
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_ps)
1055
+ #[ inline]
1056
+ #[ target_feature( enable = "avx512f" ) ]
1057
+ #[ cfg_attr( test, assert_instr( vcmp) ) ]
1058
+ pub unsafe fn _mm512_cmpeq_ps_mask ( a : __m512 , b : __m512 ) -> __mmask16 {
1059
+ _mm512_cmp_ps_mask ( a, b, _CMP_EQ_OQ)
1060
+ }
1061
+
1062
+ /// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector k
1063
+ /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
1064
+ ///
1065
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_ps)
1066
+ #[ inline]
1067
+ #[ target_feature( enable = "avx512f" ) ]
1068
+ #[ cfg_attr( test, assert_instr( vcmp) ) ]
1069
+ pub unsafe fn _mm512_mask_cmpeq_ps_mask ( m : __mmask16 , a : __m512 , b : __m512 ) -> __mmask16 {
1070
+ _mm512_mask_cmp_ps_mask ( m, a, b, _CMP_EQ_OQ)
1071
+ }
1072
+
1073
+ /// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector.
1074
+ ///
1075
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_ps)
1076
+ #[ inline]
1077
+ #[ target_feature( enable = "avx512f" ) ]
1078
+ #[ cfg_attr( test, assert_instr( vcmp) ) ]
1079
+ pub unsafe fn _mm512_cmpneq_ps_mask ( a : __m512 , b : __m512 ) -> __mmask16 {
1080
+ _mm512_cmp_ps_mask ( a, b, _CMP_NEQ_OQ)
1081
+ }
1082
+
1083
+ /// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k
1084
+ /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
1085
+ ///
1086
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_ps_mask)
1087
+ #[ inline]
1088
+ #[ target_feature( enable = "avx512f" ) ]
1089
+ #[ cfg_attr( test, assert_instr( vcmp) ) ]
1090
+ pub unsafe fn _mm512_mask_cmpneq_ps_mask ( m : __mmask16 , a : __m512 , b : __m512 ) -> __mmask16 {
1091
+ _mm512_mask_cmp_ps_mask ( m, a, b, _CMP_NEQ_OQ)
1092
+ }
1093
+
1094
+ /// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op.
1095
+ ///
1096
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask)
1097
+ #[ inline]
1098
+ #[ target_feature( enable = "avx512f" ) ]
1099
+ #[ rustc_args_required_const( 2 ) ]
1100
+ #[ cfg_attr( test, assert_instr( vcmp, op = 0 ) ) ]
1101
+ pub unsafe fn _mm512_cmp_ps_mask ( a : __m512 , b : __m512 , op : _MM_CMPINT_ENUM ) -> __mmask16 {
1102
+ let neg_one = -1 ;
1103
+ macro_rules! call {
1104
+ ( $imm5: expr) => {
1105
+ vcmpps(
1106
+ a. as_f32x16( ) ,
1107
+ b. as_f32x16( ) ,
1108
+ $imm5,
1109
+ neg_one,
1110
+ _MM_FROUND_NINT,
1111
+ )
1112
+ } ;
1113
+ }
1114
+ let r = constify_imm5 ! ( op, call) ;
1115
+ transmute ( r)
1116
+ }
1117
+
1118
+ /// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op,
1119
+ /// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
1120
+ ///
1121
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask)
1122
+ #[ inline]
1123
+ #[ target_feature( enable = "avx512f" ) ]
1124
+ #[ rustc_args_required_const( 3 ) ]
1125
+ #[ cfg_attr( test, assert_instr( vcmp, op = 0 ) ) ]
1126
+ pub unsafe fn _mm512_mask_cmp_ps_mask (
1127
+ m : __mmask16 ,
1128
+ a : __m512 ,
1129
+ b : __m512 ,
1130
+ op : _MM_CMPINT_ENUM ,
1131
+ ) -> __mmask16 {
1132
+ macro_rules! call {
1133
+ ( $imm5: expr) => {
1134
+ vcmpps(
1135
+ a. as_f32x16( ) ,
1136
+ b. as_f32x16( ) ,
1137
+ $imm5,
1138
+ m as i16 ,
1139
+ _MM_FROUND_NINT,
1140
+ )
1141
+ } ;
1142
+ }
1143
+ let r = constify_imm5 ! ( op, call) ;
1144
+ transmute :: < _ , __mmask16 > ( r) & m
1145
+ }
1146
+
968
1147
/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector.
969
1148
///
970
1149
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu32)
@@ -1686,6 +1865,8 @@ extern "C" {
1686
1865
#[ link_name = "llvm.x86.avx512.scatter.qpi.512" ]
1687
1866
fn vpscatterqd ( slice : * mut i8 , mask : i8 , offsets : i64x8 , src : i32x8 , scale : i32 ) ;
1688
1867
1868
+ #[ link_name = "llvm.x86.avx512.mask.cmp.ps.512" ]
1869
+ fn vcmpps ( a : f32x16 , b : f32x16 , op : i32 , m : i16 , sae : i32 ) -> i16 ;
1689
1870
#[ link_name = "llvm.x86.avx512.mask.ucmp.q.512" ]
1690
1871
fn vpcmpuq ( a : i64x8 , b : i64x8 , op : i32 , m : i8 ) -> i8 ;
1691
1872
#[ link_name = "llvm.x86.avx512.mask.cmp.q.512" ]
0 commit comments