Skip to content

Commit 16386ae

Browse files
author
Daniel Smith
committed
Reference code that is not compiling
1 parent 6b389b4 commit 16386ae

File tree

1 file changed

+181
-0
lines changed

1 file changed

+181
-0
lines changed

crates/core_arch/src/x86/avx512f.rs

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -965,6 +965,185 @@ pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
965965
transmute(i64x8::splat(a))
966966
}
967967

968+
/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector.
969+
///
970+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_ps)
971+
#[inline]
972+
#[target_feature(enable = "avx512f")]
973+
#[cfg_attr(test, assert_instr(vcmp))]
974+
pub unsafe fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
975+
_mm512_cmp_ps_mask(a, b, _CMP_LT_OQ)
976+
}
977+
978+
/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in a mask vector k
979+
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
980+
///
981+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmplt_ps)
982+
#[inline]
983+
#[target_feature(enable = "avx512f")]
984+
#[cfg_attr(test, assert_instr(vcmp))]
985+
pub unsafe fn _mm512_mask_cmplt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
986+
_mm512_mask_cmp_ps_mask(m, a, b, _CMP_LT_OQ)
987+
}
988+
989+
/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector.
990+
///
991+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpgt_ps)
992+
#[inline]
993+
#[target_feature(enable = "avx512f")]
994+
#[cfg_attr(test, assert_instr(vcmp))]
995+
pub unsafe fn _mm512_cmpgt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
996+
_mm512_cmp_ps_mask(a, b, _CMP_GT_OQ)
997+
}
998+
999+
/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than, and store the results in a mask vector k
1000+
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
1001+
///
1002+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpgt_ps)
1003+
#[inline]
1004+
#[target_feature(enable = "avx512f")]
1005+
#[cfg_attr(test, assert_instr(vcmp))]
1006+
pub unsafe fn _mm512_mask_cmpgt_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
1007+
_mm512_mask_cmp_ps_mask(m, a, b, _CMP_GT_OQ)
1008+
}
1009+
1010+
/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector.
1011+
///
1012+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmple_ps)
1013+
#[inline]
1014+
#[target_feature(enable = "avx512f")]
1015+
#[cfg_attr(test, assert_instr(vcmp))]
1016+
pub unsafe fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
1017+
_mm512_cmp_ps_mask(a, b, _CMP_LE_OQ)
1018+
}
1019+
1020+
/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in a mask vector k
1021+
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
1022+
///
1023+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmple_ps)
1024+
#[inline]
1025+
#[target_feature(enable = "avx512f")]
1026+
#[cfg_attr(test, assert_instr(vcmp))]
1027+
pub unsafe fn _mm512_mask_cmple_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
1028+
_mm512_mask_cmp_ps_mask(m, a, b, _CMP_LE_OQ)
1029+
}
1030+
1031+
/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than-or-equal, and store the results in a mask vector.
1032+
///
1033+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpge_ps)
1034+
#[inline]
1035+
#[target_feature(enable = "avx512f")]
1036+
#[cfg_attr(test, assert_instr(vcmp))]
1037+
pub unsafe fn _mm512_cmpge_ps_mask(a: __m512, b: __m512) -> __mmask16 {
1038+
_mm512_cmp_ps_mask(a, b, _CMP_LT_OQ)
1039+
}
1040+
1041+
/// Compare packed single-precision (32-bit) floating-point elements in a and b for greater-than-or-equal, and store the results in a mask vector k
1042+
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
1043+
///
1044+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpge_ps)
1045+
#[inline]
1046+
#[target_feature(enable = "avx512f")]
1047+
#[cfg_attr(test, assert_instr(vcmp))]
1048+
pub unsafe fn _mm512_mask_cmpge_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
1049+
_mm512_mask_cmp_ps_mask(m, a, b, _CMP_LT_OQ)
1050+
}
1051+
1052+
/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector.
1053+
///
1054+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpeq_ps)
1055+
#[inline]
1056+
#[target_feature(enable = "avx512f")]
1057+
#[cfg_attr(test, assert_instr(vcmp))]
1058+
pub unsafe fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
1059+
_mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ)
1060+
}
1061+
1062+
/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in a mask vector k
1063+
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
1064+
///
1065+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpeq_ps)
1066+
#[inline]
1067+
#[target_feature(enable = "avx512f")]
1068+
#[cfg_attr(test, assert_instr(vcmp))]
1069+
pub unsafe fn _mm512_mask_cmpeq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
1070+
_mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OQ)
1071+
}
1072+
1073+
/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector.
1074+
///
1075+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmpneq_ps)
1076+
#[inline]
1077+
#[target_feature(enable = "avx512f")]
1078+
#[cfg_attr(test, assert_instr(vcmp))]
1079+
pub unsafe fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
1080+
_mm512_cmp_ps_mask(a, b, _CMP_NEQ_OQ)
1081+
}
1082+
1083+
/// Compare packed single-precision (32-bit) floating-point elements in a and b for inequality, and store the results in a mask vector k
1084+
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
1085+
///
1086+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmpneq_ps_mask)
1087+
#[inline]
1088+
#[target_feature(enable = "avx512f")]
1089+
#[cfg_attr(test, assert_instr(vcmp))]
1090+
pub unsafe fn _mm512_mask_cmpneq_ps_mask(m: __mmask16, a: __m512, b: __m512) -> __mmask16 {
1091+
_mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_OQ)
1092+
}
1093+
1094+
/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op.
1095+
///
1096+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask)
1097+
#[inline]
1098+
#[target_feature(enable = "avx512f")]
1099+
#[rustc_args_required_const(2)]
1100+
#[cfg_attr(test, assert_instr(vcmp, op = 0))]
1101+
pub unsafe fn _mm512_cmp_ps_mask(a: __m512, b: __m512, op: _MM_CMPINT_ENUM) -> __mmask16 {
1102+
let neg_one = -1;
1103+
macro_rules! call {
1104+
($imm5:expr) => {
1105+
vcmpps(
1106+
a.as_f32x16(),
1107+
b.as_f32x16(),
1108+
$imm5,
1109+
neg_one,
1110+
_MM_FROUND_NINT,
1111+
)
1112+
};
1113+
}
1114+
let r = constify_imm5!(op, call);
1115+
transmute(r)
1116+
}
1117+
1118+
/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by op,
1119+
/// using zeromask m (elements are zeroed out when the corresponding mask bit is not set).
1120+
///
1121+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,1063&text=_mm512_mask_cmp_ps_mask)
1122+
#[inline]
1123+
#[target_feature(enable = "avx512f")]
1124+
#[rustc_args_required_const(3)]
1125+
#[cfg_attr(test, assert_instr(vcmp, op = 0))]
1126+
pub unsafe fn _mm512_mask_cmp_ps_mask(
1127+
m: __mmask16,
1128+
a: __m512,
1129+
b: __m512,
1130+
op: _MM_CMPINT_ENUM,
1131+
) -> __mmask16 {
1132+
macro_rules! call {
1133+
($imm5:expr) => {
1134+
vcmpps(
1135+
a.as_f32x16(),
1136+
b.as_f32x16(),
1137+
$imm5,
1138+
m as i16,
1139+
_MM_FROUND_NINT,
1140+
)
1141+
};
1142+
}
1143+
let r = constify_imm5!(op, call);
1144+
transmute::<_, __mmask16>(r) & m
1145+
}
1146+
9681147
/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in a mask vector.
9691148
///
9701149
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062&text=_mm512_cmplt_epu32)
@@ -1686,6 +1865,8 @@ extern "C" {
16861865
#[link_name = "llvm.x86.avx512.scatter.qpi.512"]
16871866
fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
16881867

1868+
#[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
1869+
fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
16891870
#[link_name = "llvm.x86.avx512.mask.ucmp.q.512"]
16901871
fn vpcmpuq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
16911872
#[link_name = "llvm.x86.avx512.mask.cmp.q.512"]

0 commit comments

Comments
 (0)