Skip to content

Commit 6d9fd24

Browse files
committed
Auto merge of #3690 - TDecking:adx, r=RalfJung
Implement LLVM x86 adx intrinsics See title. It also explots a small opportunity to deduplicate a bit of intrinsics code.
2 parents 51363f5 + dd2bd5b commit 6d9fd24

File tree

2 files changed

+128
-48
lines changed

2 files changed

+128
-48
lines changed

src/tools/miri/src/shims/x86/mod.rs

Lines changed: 58 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -35,63 +35,49 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
3535
// Prefix should have already been checked.
3636
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.").unwrap();
3737
match unprefixed_name {
38-
// Used to implement the `_addcarry_u32` and `_addcarry_u64` functions.
39-
// Computes a + b with input and output carry. The input carry is an 8-bit
40-
// value, which is interpreted as 1 if it is non-zero. The output carry is
41-
// an 8-bit value that will be 0 or 1.
38+
// Used to implement the `_addcarry_u{32, 64}` and the `_subborrow_u{32, 64}` functions.
39+
// Computes a + b or a - b with input and output carry/borrow. The input carry/borrow is an 8-bit
40+
// value, which is interpreted as 1 if it is non-zero. The output carry/borrow is an 8-bit value that will be 0 or 1.
4241
// https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/addcarry-u32-addcarry-u64.html
43-
"addcarry.32" | "addcarry.64" => {
44-
if unprefixed_name == "addcarry.64" && this.tcx.sess.target.arch != "x86_64" {
42+
// https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/subborrow-u32-subborrow-u64.html
43+
"addcarry.32" | "addcarry.64" | "subborrow.32" | "subborrow.64" => {
44+
if unprefixed_name.ends_with("64") && this.tcx.sess.target.arch != "x86_64" {
4545
return Ok(EmulateItemResult::NotSupported);
4646
}
4747

48-
let [c_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
49-
let c_in = this.read_scalar(c_in)?.to_u8()? != 0;
50-
let a = this.read_immediate(a)?;
51-
let b = this.read_immediate(b)?;
52-
53-
let (sum, overflow1) =
54-
this.binary_op(mir::BinOp::AddWithOverflow, &a, &b)?.to_pair(this);
55-
let (sum, overflow2) = this
56-
.binary_op(
57-
mir::BinOp::AddWithOverflow,
58-
&sum,
59-
&ImmTy::from_uint(c_in, a.layout),
60-
)?
61-
.to_pair(this);
62-
let c_out = overflow1.to_scalar().to_bool()? | overflow2.to_scalar().to_bool()?;
63-
64-
this.write_scalar(Scalar::from_u8(c_out.into()), &this.project_field(dest, 0)?)?;
48+
let [cb_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
49+
50+
let op = if unprefixed_name.starts_with("add") {
51+
mir::BinOp::AddWithOverflow
52+
} else {
53+
mir::BinOp::SubWithOverflow
54+
};
55+
56+
let (sum, cb_out) = carrying_add(this, cb_in, a, b, op)?;
57+
this.write_scalar(cb_out, &this.project_field(dest, 0)?)?;
6558
this.write_immediate(*sum, &this.project_field(dest, 1)?)?;
6659
}
67-
// Used to implement the `_subborrow_u32` and `_subborrow_u64` functions.
68-
// Computes a - b with input and output borrow. The input borrow is an 8-bit
69-
// value, which is interpreted as 1 if it is non-zero. The output borrow is
70-
// an 8-bit value that will be 0 or 1.
71-
// https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/subborrow-u32-subborrow-u64.html
72-
"subborrow.32" | "subborrow.64" => {
73-
if unprefixed_name == "subborrow.64" && this.tcx.sess.target.arch != "x86_64" {
60+
61+
// Used to implement the `_addcarryx_u{32, 64}` functions. They are semantically identical with the `_addcarry_u{32, 64}` functions,
62+
// except for a slightly different type signature and the requirement for the "adx" target feature.
63+
// https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/addcarryx-u32-addcarryx-u64.html
64+
"addcarryx.u32" | "addcarryx.u64" => {
65+
this.expect_target_feature_for_intrinsic(link_name, "adx")?;
66+
67+
let is_u64 = unprefixed_name.ends_with("64");
68+
if is_u64 && this.tcx.sess.target.arch != "x86_64" {
7469
return Ok(EmulateItemResult::NotSupported);
7570
}
7671

77-
let [b_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
78-
let b_in = this.read_scalar(b_in)?.to_u8()? != 0;
79-
let a = this.read_immediate(a)?;
80-
let b = this.read_immediate(b)?;
81-
82-
let (sub, overflow1) =
83-
this.binary_op(mir::BinOp::SubWithOverflow, &a, &b)?.to_pair(this);
84-
let (sub, overflow2) = this
85-
.binary_op(
86-
mir::BinOp::SubWithOverflow,
87-
&sub,
88-
&ImmTy::from_uint(b_in, a.layout),
89-
)?
90-
.to_pair(this);
91-
let b_out = overflow1.to_scalar().to_bool()? | overflow2.to_scalar().to_bool()?;
92-
93-
this.write_scalar(Scalar::from_u8(b_out.into()), &this.project_field(dest, 0)?)?;
94-
this.write_immediate(*sub, &this.project_field(dest, 1)?)?;
72+
let [c_in, a, b, out] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
73+
let out = this.deref_pointer_as(
74+
out,
75+
if is_u64 { this.machine.layouts.u64 } else { this.machine.layouts.u32 },
76+
)?;
77+
78+
let (sum, c_out) = carrying_add(this, c_in, a, b, mir::BinOp::AddWithOverflow)?;
79+
this.write_scalar(c_out, dest)?;
80+
this.write_immediate(*sum, &out)?;
9581
}
9682

9783
// Used to implement the `_mm_pause` function.
@@ -1359,3 +1345,27 @@ fn psign<'tcx>(
13591345

13601346
Ok(())
13611347
}
1348+
1349+
/// Calcultates either `a + b + cb_in` or `a - b - cb_in` depending on the value
1350+
/// of `op` and returns both the sum and the overflow bit. `op` is expected to be
1351+
/// either one of `mir::BinOp::AddWithOverflow` and `mir::BinOp::SubWithOverflow`.
1352+
fn carrying_add<'tcx>(
1353+
this: &mut crate::MiriInterpCx<'tcx>,
1354+
cb_in: &OpTy<'tcx>,
1355+
a: &OpTy<'tcx>,
1356+
b: &OpTy<'tcx>,
1357+
op: mir::BinOp,
1358+
) -> InterpResult<'tcx, (ImmTy<'tcx>, Scalar)> {
1359+
assert!(op == mir::BinOp::AddWithOverflow || op == mir::BinOp::SubWithOverflow);
1360+
1361+
let cb_in = this.read_scalar(cb_in)?.to_u8()? != 0;
1362+
let a = this.read_immediate(a)?;
1363+
let b = this.read_immediate(b)?;
1364+
1365+
let (sum, overflow1) = this.binary_op(op, &a, &b)?.to_pair(this);
1366+
let (sum, overflow2) =
1367+
this.binary_op(op, &sum, &ImmTy::from_uint(cb_in, a.layout))?.to_pair(this);
1368+
let cb_out = overflow1.to_scalar().to_bool()? | overflow2.to_scalar().to_bool()?;
1369+
1370+
Ok((sum, Scalar::from_u8(cb_out.into())))
1371+
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// Ignore everything except x86 and x86_64
2+
// Any new targets that are added to CI should be ignored here.
3+
// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.)
4+
//@ignore-target-aarch64
5+
//@ignore-target-arm
6+
//@ignore-target-avr
7+
//@ignore-target-s390x
8+
//@ignore-target-thumbv7em
9+
//@ignore-target-wasm32
10+
//@compile-flags: -C target-feature=+adx
11+
12+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
13+
mod x86 {
14+
#[cfg(target_arch = "x86")]
15+
use core::arch::x86 as arch;
16+
#[cfg(target_arch = "x86_64")]
17+
use core::arch::x86_64 as arch;
18+
19+
fn adc(c_in: u8, a: u32, b: u32) -> (u8, u32) {
20+
let mut sum = 0;
21+
// SAFETY: There are no safety requirements for calling `_addcarry_u32`.
22+
// It's just unsafe for API consistency with other intrinsics.
23+
let c_out = unsafe { arch::_addcarryx_u32(c_in, a, b, &mut sum) };
24+
(c_out, sum)
25+
}
26+
27+
pub fn main() {
28+
assert_eq!(adc(0, 1, 1), (0, 2));
29+
assert_eq!(adc(1, 1, 1), (0, 3));
30+
assert_eq!(adc(2, 1, 1), (0, 3)); // any non-zero carry acts as 1!
31+
assert_eq!(adc(u8::MAX, 1, 1), (0, 3));
32+
assert_eq!(adc(0, u32::MAX, u32::MAX), (1, u32::MAX - 1));
33+
assert_eq!(adc(1, u32::MAX, u32::MAX), (1, u32::MAX));
34+
assert_eq!(adc(2, u32::MAX, u32::MAX), (1, u32::MAX));
35+
assert_eq!(adc(u8::MAX, u32::MAX, u32::MAX), (1, u32::MAX));
36+
}
37+
}
38+
39+
#[cfg(target_arch = "x86_64")]
40+
mod x86_64 {
41+
use core::arch::x86_64 as arch;
42+
43+
fn adc(c_in: u8, a: u64, b: u64) -> (u8, u64) {
44+
let mut sum = 0;
45+
// SAFETY: There are no safety requirements for calling `_addcarry_u64`.
46+
// It's just unsafe for API consistency with other intrinsics.
47+
let c_out = unsafe { arch::_addcarryx_u64(c_in, a, b, &mut sum) };
48+
(c_out, sum)
49+
}
50+
51+
pub fn main() {
52+
assert_eq!(adc(0, 1, 1), (0, 2));
53+
assert_eq!(adc(1, 1, 1), (0, 3));
54+
assert_eq!(adc(2, 1, 1), (0, 3)); // any non-zero carry acts as 1!
55+
assert_eq!(adc(u8::MAX, 1, 1), (0, 3));
56+
assert_eq!(adc(0, u64::MAX, u64::MAX), (1, u64::MAX - 1));
57+
assert_eq!(adc(1, u64::MAX, u64::MAX), (1, u64::MAX));
58+
assert_eq!(adc(2, u64::MAX, u64::MAX), (1, u64::MAX));
59+
assert_eq!(adc(u8::MAX, u64::MAX, u64::MAX), (1, u64::MAX));
60+
}
61+
}
62+
63+
fn main() {
64+
assert!(is_x86_feature_detected!("adx"));
65+
66+
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
67+
x86::main();
68+
#[cfg(target_arch = "x86_64")]
69+
x86_64::main();
70+
}

0 commit comments

Comments
 (0)