Auto merge of #3690 - TDecking:adx, r=RalfJung

bors · bors · commit 6d9fd241aa9b · 2024-06-22T15:13:36.000Z
Implement LLVM x86 adx intrinsics

See title. It also explots a small opportunity to deduplicate a bit of intrinsics code.
diff --git a/src/tools/miri/src/shims/x86/mod.rs b/src/tools/miri/src/shims/x86/mod.rs
@@ -35,63 +35,49 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         // Prefix should have already been checked.
         let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.").unwrap();
         match unprefixed_name {
-            // Used to implement the `_addcarry_u32` and `_addcarry_u64` functions.
-            // Computes a + b with input and output carry. The input carry is an 8-bit
-            // value, which is interpreted as 1 if it is non-zero. The output carry is
-            // an 8-bit value that will be 0 or 1.
+            // Used to implement the `_addcarry_u{32, 64}` and the `_subborrow_u{32, 64}` functions.
+            // Computes a + b or a - b with input and output carry/borrow. The input carry/borrow is an 8-bit
+            // value, which is interpreted as 1 if it is non-zero. The output carry/borrow is an 8-bit value that will be 0 or 1.
             // https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/addcarry-u32-addcarry-u64.html
-            "addcarry.32" | "addcarry.64" => {
-                if unprefixed_name == "addcarry.64" && this.tcx.sess.target.arch != "x86_64" {
+            // https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/subborrow-u32-subborrow-u64.html
+            "addcarry.32" | "addcarry.64" | "subborrow.32" | "subborrow.64" => {
+                if unprefixed_name.ends_with("64") && this.tcx.sess.target.arch != "x86_64" {
                     return Ok(EmulateItemResult::NotSupported);
                 }
 
-                let [c_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
-                let c_in = this.read_scalar(c_in)?.to_u8()? != 0;
-                let a = this.read_immediate(a)?;
-                let b = this.read_immediate(b)?;
-
-                let (sum, overflow1) =
-                    this.binary_op(mir::BinOp::AddWithOverflow, &a, &b)?.to_pair(this);
-                let (sum, overflow2) = this
-                    .binary_op(
-                        mir::BinOp::AddWithOverflow,
-                        &sum,
-                        &ImmTy::from_uint(c_in, a.layout),
-                    )?
-                    .to_pair(this);
-                let c_out = overflow1.to_scalar().to_bool()? | overflow2.to_scalar().to_bool()?;
-
-                this.write_scalar(Scalar::from_u8(c_out.into()), &this.project_field(dest, 0)?)?;
+                let [cb_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
+
+                let op = if unprefixed_name.starts_with("add") {
+                    mir::BinOp::AddWithOverflow
+                } else {
+                    mir::BinOp::SubWithOverflow
+                };
+
+                let (sum, cb_out) = carrying_add(this, cb_in, a, b, op)?;
+                this.write_scalar(cb_out, &this.project_field(dest, 0)?)?;
                 this.write_immediate(*sum, &this.project_field(dest, 1)?)?;
             }
-            // Used to implement the `_subborrow_u32` and `_subborrow_u64` functions.
-            // Computes a - b with input and output borrow. The input borrow is an 8-bit
-            // value, which is interpreted as 1 if it is non-zero. The output borrow is
-            // an 8-bit value that will be 0 or 1.
-            // https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/subborrow-u32-subborrow-u64.html
-            "subborrow.32" | "subborrow.64" => {
-                if unprefixed_name == "subborrow.64" && this.tcx.sess.target.arch != "x86_64" {
+
+            // Used to implement the `_addcarryx_u{32, 64}` functions. They are semantically identical with the `_addcarry_u{32, 64}` functions,
+            // except for a slightly different type signature and the requirement for the "adx" target feature.
+            // https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/addcarryx-u32-addcarryx-u64.html
+            "addcarryx.u32" | "addcarryx.u64" => {
+                this.expect_target_feature_for_intrinsic(link_name, "adx")?;
+
+                let is_u64 = unprefixed_name.ends_with("64");
+                if is_u64 && this.tcx.sess.target.arch != "x86_64" {
                     return Ok(EmulateItemResult::NotSupported);
                 }
 
-                let [b_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
-                let b_in = this.read_scalar(b_in)?.to_u8()? != 0;
-                let a = this.read_immediate(a)?;
-                let b = this.read_immediate(b)?;
-
-                let (sub, overflow1) =
-                    this.binary_op(mir::BinOp::SubWithOverflow, &a, &b)?.to_pair(this);
-                let (sub, overflow2) = this
-                    .binary_op(
-                        mir::BinOp::SubWithOverflow,
-                        &sub,
-                        &ImmTy::from_uint(b_in, a.layout),
-                    )?
-                    .to_pair(this);
-                let b_out = overflow1.to_scalar().to_bool()? | overflow2.to_scalar().to_bool()?;
-
-                this.write_scalar(Scalar::from_u8(b_out.into()), &this.project_field(dest, 0)?)?;
-                this.write_immediate(*sub, &this.project_field(dest, 1)?)?;
+                let [c_in, a, b, out] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
+                let out = this.deref_pointer_as(
+                    out,
+                    if is_u64 { this.machine.layouts.u64 } else { this.machine.layouts.u32 },
+                )?;
+
+                let (sum, c_out) = carrying_add(this, c_in, a, b, mir::BinOp::AddWithOverflow)?;
+                this.write_scalar(c_out, dest)?;
+                this.write_immediate(*sum, &out)?;
             }
 
             // Used to implement the `_mm_pause` function.
@@ -1359,3 +1345,27 @@ fn psign<'tcx>(
 
     Ok(())
 }
+
+/// Calcultates either `a + b + cb_in` or `a - b - cb_in` depending on the value
+/// of `op` and returns both the sum and the overflow bit. `op` is expected to be
+/// either one of `mir::BinOp::AddWithOverflow` and `mir::BinOp::SubWithOverflow`.
+fn carrying_add<'tcx>(
+    this: &mut crate::MiriInterpCx<'tcx>,
+    cb_in: &OpTy<'tcx>,
+    a: &OpTy<'tcx>,
+    b: &OpTy<'tcx>,
+    op: mir::BinOp,
+) -> InterpResult<'tcx, (ImmTy<'tcx>, Scalar)> {
+    assert!(op == mir::BinOp::AddWithOverflow || op == mir::BinOp::SubWithOverflow);
+
+    let cb_in = this.read_scalar(cb_in)?.to_u8()? != 0;
+    let a = this.read_immediate(a)?;
+    let b = this.read_immediate(b)?;
+
+    let (sum, overflow1) = this.binary_op(op, &a, &b)?.to_pair(this);
+    let (sum, overflow2) =
+        this.binary_op(op, &sum, &ImmTy::from_uint(cb_in, a.layout))?.to_pair(this);
+    let cb_out = overflow1.to_scalar().to_bool()? | overflow2.to_scalar().to_bool()?;
+
+    Ok((sum, Scalar::from_u8(cb_out.into())))
+}
diff --git a/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-adx.rs b/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-adx.rs
@@ -0,0 +1,70 @@
+// Ignore everything except x86 and x86_64
+// Any new targets that are added to CI should be ignored here.
+// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.)
+//@ignore-target-aarch64
+//@ignore-target-arm
+//@ignore-target-avr
+//@ignore-target-s390x
+//@ignore-target-thumbv7em
+//@ignore-target-wasm32
+//@compile-flags: -C target-feature=+adx
+
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+mod x86 {
+    #[cfg(target_arch = "x86")]
+    use core::arch::x86 as arch;
+    #[cfg(target_arch = "x86_64")]
+    use core::arch::x86_64 as arch;
+
+    fn adc(c_in: u8, a: u32, b: u32) -> (u8, u32) {
+        let mut sum = 0;
+        // SAFETY: There are no safety requirements for calling `_addcarry_u32`.
+        // It's just unsafe for API consistency with other intrinsics.
+        let c_out = unsafe { arch::_addcarryx_u32(c_in, a, b, &mut sum) };
+        (c_out, sum)
+    }
+
+    pub fn main() {
+        assert_eq!(adc(0, 1, 1), (0, 2));
+        assert_eq!(adc(1, 1, 1), (0, 3));
+        assert_eq!(adc(2, 1, 1), (0, 3)); // any non-zero carry acts as 1!
+        assert_eq!(adc(u8::MAX, 1, 1), (0, 3));
+        assert_eq!(adc(0, u32::MAX, u32::MAX), (1, u32::MAX - 1));
+        assert_eq!(adc(1, u32::MAX, u32::MAX), (1, u32::MAX));
+        assert_eq!(adc(2, u32::MAX, u32::MAX), (1, u32::MAX));
+        assert_eq!(adc(u8::MAX, u32::MAX, u32::MAX), (1, u32::MAX));
+    }
+}
+
+#[cfg(target_arch = "x86_64")]
+mod x86_64 {
+    use core::arch::x86_64 as arch;
+
+    fn adc(c_in: u8, a: u64, b: u64) -> (u8, u64) {
+        let mut sum = 0;
+        // SAFETY: There are no safety requirements for calling `_addcarry_u64`.
+        // It's just unsafe for API consistency with other intrinsics.
+        let c_out = unsafe { arch::_addcarryx_u64(c_in, a, b, &mut sum) };
+        (c_out, sum)
+    }
+
+    pub fn main() {
+        assert_eq!(adc(0, 1, 1), (0, 2));
+        assert_eq!(adc(1, 1, 1), (0, 3));
+        assert_eq!(adc(2, 1, 1), (0, 3)); // any non-zero carry acts as 1!
+        assert_eq!(adc(u8::MAX, 1, 1), (0, 3));
+        assert_eq!(adc(0, u64::MAX, u64::MAX), (1, u64::MAX - 1));
+        assert_eq!(adc(1, u64::MAX, u64::MAX), (1, u64::MAX));
+        assert_eq!(adc(2, u64::MAX, u64::MAX), (1, u64::MAX));
+        assert_eq!(adc(u8::MAX, u64::MAX, u64::MAX), (1, u64::MAX));
+    }
+}
+
+fn main() {
+    assert!(is_x86_feature_detected!("adx"));
+
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    x86::main();
+    #[cfg(target_arch = "x86_64")]
+    x86_64::main();
+}