-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Clang][CodeGen] Optimised LLVM IR for atomic increments/decrements on floats #89362
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-clang Author: Krishna Narayanan (Krishna-13-cyber) ChangesAddresses #53079 Patch is 48.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/89362.diff 3 Files Affected:
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 1f18e0d5ba409a..e97bb5c7e9dd16 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2792,6 +2792,19 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
llvm::AtomicOrdering::SequentiallyConsistent);
return isPre ? Builder.CreateBinOp(op, old, amt) : old;
}
+ // Special case for atomic increment/decrement on floats
+ if (type->isFloatingType()) {
+ llvm::AtomicRMWInst::BinOp aop =
+ isInc ? llvm::AtomicRMWInst::FAdd : llvm::AtomicRMWInst::FSub;
+ llvm::Instruction::BinaryOps op =
+ isInc ? llvm::Instruction::FAdd : llvm::Instruction::FSub;
+ llvm::Value *amt = llvm::ConstantFP::get(
+ VMContext, llvm::APFloat(static_cast<float>(amount)));
+ llvm::Value *old =
+ Builder.CreateAtomicRMW(aop, LV.getAddress(CGF), amt,
+ llvm::AtomicOrdering::SequentiallyConsistent);
+ return isPre ? Builder.CreateBinOp(op, old, amt) : old;
+ }
value = EmitLoadOfLValue(LV, E->getExprLoc());
input = value;
// For every other atomic operation, we need to emit a load-op-cmpxchg loop
diff --git a/clang/test/CodeGen/X86/x86-atomic-float.c b/clang/test/CodeGen/X86/x86-atomic-float.c
new file mode 100644
index 00000000000000..89a2605ed44461
--- /dev/null
+++ b/clang/test/CodeGen/X86/x86-atomic-float.c
@@ -0,0 +1,97 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu core2 %s -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu core2 %s -S -emit-llvm -o - | FileCheck -check-prefix=CHECK32 %s
+
+// CHECK-LABEL: define dso_local i32 @test_int_inc(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw add ptr @test_int_inc.n, i32 1 seq_cst, align 4
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+// CHECK32-LABEL: define dso_local i32 @test_int_inc(
+// CHECK32-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK32-NEXT: entry:
+// CHECK32-NEXT: [[TMP0:%.*]] = atomicrmw add ptr @test_int_inc.n, i32 1 seq_cst, align 4
+// CHECK32-NEXT: ret i32 [[TMP0]]
+//
+int test_int_inc()
+{
+ static _Atomic int n;
+ return n++;
+}
+
+// CHECK-LABEL: define dso_local float @test_float_post_inc(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr @test_float_post_inc.n, float 1.000000e+00 seq_cst, align 4
+// CHECK-NEXT: ret float [[TMP0]]
+//
+// CHECK32-LABEL: define dso_local float @test_float_post_inc(
+// CHECK32-SAME: ) #[[ATTR0]] {
+// CHECK32-NEXT: entry:
+// CHECK32-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr @test_float_post_inc.n, float 1.000000e+00 seq_cst, align 4
+// CHECK32-NEXT: ret float [[TMP0]]
+//
+float test_float_post_inc()
+{
+ static _Atomic float n;
+ return n++;
+}
+
+// CHECK-LABEL: define dso_local float @test_float_post_dc(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr @test_float_post_dc.n, float -1.000000e+00 seq_cst, align 4
+// CHECK-NEXT: ret float [[TMP0]]
+//
+// CHECK32-LABEL: define dso_local float @test_float_post_dc(
+// CHECK32-SAME: ) #[[ATTR0]] {
+// CHECK32-NEXT: entry:
+// CHECK32-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr @test_float_post_dc.n, float -1.000000e+00 seq_cst, align 4
+// CHECK32-NEXT: ret float [[TMP0]]
+//
+float test_float_post_dc()
+{
+ static _Atomic float n;
+ return n--;
+}
+
+// CHECK-LABEL: define dso_local float @test_float_pre_dc(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr @test_float_pre_dc.n, float -1.000000e+00 seq_cst, align 4
+// CHECK-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], -1.000000e+00
+// CHECK-NEXT: ret float [[TMP1]]
+//
+// CHECK32-LABEL: define dso_local float @test_float_pre_dc(
+// CHECK32-SAME: ) #[[ATTR0]] {
+// CHECK32-NEXT: entry:
+// CHECK32-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr @test_float_pre_dc.n, float -1.000000e+00 seq_cst, align 4
+// CHECK32-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], -1.000000e+00
+// CHECK32-NEXT: ret float [[TMP1]]
+//
+float test_float_pre_dc()
+{
+ static _Atomic float n;
+ return --n;
+}
+
+// CHECK-LABEL: define dso_local float @test_float_pre_inc(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr @test_float_pre_inc.n, float 1.000000e+00 seq_cst, align 4
+// CHECK-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
+// CHECK-NEXT: ret float [[TMP1]]
+//
+// CHECK32-LABEL: define dso_local float @test_float_pre_inc(
+// CHECK32-SAME: ) #[[ATTR0]] {
+// CHECK32-NEXT: entry:
+// CHECK32-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr @test_float_pre_inc.n, float 1.000000e+00 seq_cst, align 4
+// CHECK32-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
+// CHECK32-NEXT: ret float [[TMP1]]
+//
+float test_float_pre_inc()
+{
+ static _Atomic float n;
+ return ++n;
+}
diff --git a/clang/test/CodeGen/X86/x86-atomic-long_double.c b/clang/test/CodeGen/X86/x86-atomic-long_double.c
index ca3fb6730fb640..b7f393f51c791e 100644
--- a/clang/test/CodeGen/X86/x86-atomic-long_double.c
+++ b/clang/test/CodeGen/X86/x86-atomic-long_double.c
@@ -1,351 +1,342 @@
// RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu core2 %s -S -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu core2 %s -S -emit-llvm -o - | FileCheck -check-prefix=CHECK32 %s
+// CHECK-LABEL: define dso_local x86_fp80 @testinc(
+// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16
+// CHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00
+// CHECK-NEXT: store float [[TMP2]], ptr [[RETVAL]], align 16
+// CHECK-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16
+// CHECK-NEXT: ret x86_fp80 [[TMP3]]
+//
+// CHECK32-LABEL: define dso_local x86_fp80 @testinc(
+// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK32-NEXT: entry:
+// CHECK32-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK32-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4
+// CHECK32-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00
+// CHECK32-NEXT: store float [[TMP2]], ptr [[RETVAL]], align 4
+// CHECK32-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4
+// CHECK32-NEXT: ret x86_fp80 [[TMP3]]
+//
long double testinc(_Atomic long double *addr) {
- // CHECK-LABEL: @testinc
- // CHECK: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 8
- // CHECK: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 8
- // CHECK: [[INT_VALUE:%.+]] = load atomic i128, ptr [[ADDR]] seq_cst, align 16
- // CHECK: store i128 [[INT_VALUE]], ptr [[LD_ADDR:%.+]], align 16
- // CHECK: [[LD_VALUE:%.+]] = load x86_fp80, ptr [[LD_ADDR]], align 16
- // CHECK: br label %[[ATOMIC_OP:.+]]
- // CHECK: [[ATOMIC_OP]]
- // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
- // CHECK: [[INC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
- // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[OLD_VALUE_ADDR:%.+]], i8 0, i64 16, i1 false)
- // CHECK: store x86_fp80 [[OLD_VALUE]], ptr [[OLD_VALUE_ADDR]], align 16
- // CHECK: [[OLD_INT:%.+]] = load i128, ptr [[OLD_VALUE_ADDR]], align 16
- // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[NEW_VALUE_ADDR:%.+]], i8 0, i64 16, i1 false)
- // CHECK: store x86_fp80 [[INC_VALUE]], ptr [[NEW_VALUE_ADDR]], align 16
- // CHECK: [[NEW_INT:%.+]] = load i128, ptr [[NEW_VALUE_ADDR]], align 16
- // CHECK: [[RES:%.+]] = cmpxchg ptr [[ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst, align 16
- // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
- // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
- // CHECK: store i128 [[OLD_VALUE]], ptr [[OLD_VALUE_RES_PTR:%.+]], align 16
- // CHECK: [[LD_VALUE]] = load x86_fp80, ptr [[OLD_VALUE_RES_PTR]], align 16
- // CHECK: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
- // CHECK: [[ATOMIC_CONT]]
- // CHECK: ret x86_fp80 [[INC_VALUE]]
- // CHECK32-LABEL: @testinc
- // CHECK32: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 4
- // CHECK32: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 4
- // CHECK32: call void @__atomic_load(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[TEMP_LD_ADDR:%.+]], i32 noundef 5)
- // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80, ptr [[TEMP_LD_ADDR]], align 4
- // CHECK32: br label %[[ATOMIC_OP:.+]]
- // CHECK32: [[ATOMIC_OP]]
- // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
- // CHECK32: [[INC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
- // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[OLD_VALUE_ADDR:%.+]], i8 0, i64 12, i1 false)
- // CHECK32: store x86_fp80 [[OLD_VALUE]], ptr [[OLD_VALUE_ADDR]], align 4
- // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[DESIRED_VALUE_ADDR:%.+]], i8 0, i64 12, i1 false)
- // CHECK32: store x86_fp80 [[INC_VALUE]], ptr [[DESIRED_VALUE_ADDR]], align 4
- // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[OLD_VALUE_ADDR]], ptr noundef [[DESIRED_VALUE_ADDR]], i32 noundef 5, i32 noundef 5)
- // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80, ptr [[OLD_VALUE_ADDR]], align 4
- // CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
- // CHECK32: [[ATOMIC_CONT]]
- // CHECK32: ret x86_fp80 [[INC_VALUE]]
return ++*addr;
}
+// CHECK-LABEL: define dso_local x86_fp80 @testdec(
+// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float -1.000000e+00 seq_cst, align 16
+// CHECK-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 16
+// CHECK-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16
+// CHECK-NEXT: ret x86_fp80 [[TMP2]]
+//
+// CHECK32-LABEL: define dso_local x86_fp80 @testdec(
+// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK32-NEXT: entry:
+// CHECK32-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK32-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float -1.000000e+00 seq_cst, align 4
+// CHECK32-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 4
+// CHECK32-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4
+// CHECK32-NEXT: ret x86_fp80 [[TMP2]]
+//
long double testdec(_Atomic long double *addr) {
- // CHECK-LABEL: @testdec
- // CHECK: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 8
- // CHECK: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 8
- // CHECK: [[INT_VALUE:%.+]] = load atomic i128, ptr [[ADDR]] seq_cst, align 16
- // CHECK: store i128 [[INT_VALUE]], ptr [[LD_ADDR:%.+]], align 16
- // CHECK: [[ORIG_LD_VALUE:%.+]] = load x86_fp80, ptr [[LD_ADDR]], align 16
- // CHECK: br label %[[ATOMIC_OP:.+]]
- // CHECK: [[ATOMIC_OP]]
- // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
- // CHECK: [[DEC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
- // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[OLD_VALUE_ADDR:%.+]], i8 0, i64 16, i1 false)
- // CHECK: store x86_fp80 [[OLD_VALUE]], ptr [[OLD_VALUE_ADDR]], align 16
- // CHECK: [[OLD_INT:%.+]] = load i128, ptr [[OLD_VALUE_ADDR]], align 16
- // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[NEW_VALUE_ADDR:%.+]], i8 0, i64 16, i1 false)
- // CHECK: store x86_fp80 [[DEC_VALUE]], ptr [[NEW_VALUE_ADDR]], align 16
- // CHECK: [[NEW_INT:%.+]] = load i128, ptr [[NEW_VALUE_ADDR]], align 16
- // CHECK: [[RES:%.+]] = cmpxchg ptr [[ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst, align 16
- // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
- // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
- // CHECK: store i128 [[OLD_VALUE]], ptr [[OLD_VALUE_RES_PTR:%.+]], align 16
- // CHECK: [[LD_VALUE]] = load x86_fp80, ptr [[OLD_VALUE_RES_PTR]], align 16
- // CHECK: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
- // CHECK: [[ATOMIC_CONT]]
- // CHECK: ret x86_fp80 [[ORIG_LD_VALUE]]
- // CHECK32-LABEL: @testdec
- // CHECK32: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 4
- // CHECK32: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 4
- // CHECK32: call void @__atomic_load(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[TEMP_LD_ADDR:%.+]], i32 noundef 5)
- // CHECK32: [[ORIG_LD_VALUE:%.+]] = load x86_fp80, ptr [[TEMP_LD_ADDR]], align 4
- // CHECK32: br label %[[ATOMIC_OP:.+]]
- // CHECK32: [[ATOMIC_OP]]
- // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
- // CHECK32: [[DEC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
- // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[OLD_VALUE_ADDR:%.+]], i8 0, i64 12, i1 false)
- // CHECK32: store x86_fp80 [[OLD_VALUE]], ptr [[OLD_VALUE_ADDR]], align 4
- // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[DESIRED_VALUE_ADDR:%.+]], i8 0, i64 12, i1 false)
- // CHECK32: store x86_fp80 [[DEC_VALUE]], ptr [[DESIRED_VALUE_ADDR]], align 4
- // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[OLD_VALUE_ADDR]], ptr noundef [[DESIRED_VALUE_ADDR]], i32 noundef 5, i32 noundef 5)
- // CHECK32: [[LD_VALUE]] = load x86_fp80, ptr [[OLD_VALUE_ADDR]], align 4
- // CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
- // CHECK32: [[ATOMIC_CONT]]
- // CHECK32: ret x86_fp80 [[ORIG_LD_VALUE]]
return (*addr)--;
}
+// CHECK-LABEL: define dso_local x86_fp80 @testcompassign(
+// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT: [[ATOMIC_TEMP5:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i128, ptr [[TMP0]] seq_cst, align 16
+// CHECK-NEXT: store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP]], align 16
+// CHECK-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 16
+// CHECK-NEXT: br label [[ATOMIC_OP:%.*]]
+// CHECK: atomic_op:
+// CHECK-NEXT: [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[ATOMIC_OP]] ]
+// CHECK-NEXT: [[SUB:%.*]] = fsub x86_fp80 [[TMP2]], 0xK4003C800000000000000
+// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP1]], i8 0, i64 16, i1 false)
+// CHECK-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 16
+// CHECK-NEXT: [[TMP3:%.*]] = load i128, ptr [[ATOMIC_TEMP1]], align 16
+// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP2]], i8 0, i64 16, i1 false)
+// CHECK-NEXT: store x86_fp80 [[SUB]], ptr [[ATOMIC_TEMP2]], align 16
+// CHECK-NEXT: [[TMP4:%.*]] = load i128, ptr [[ATOMIC_TEMP2]], align 16
+// CHECK-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[TMP0]], i128 [[TMP3]], i128 [[TMP4]] seq_cst seq_cst, align 16
+// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i128, i1 } [[TMP5]], 0
+// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i128, i1 } [[TMP5]], 1
+// CHECK-NEXT: store i128 [[TMP6]], ptr [[ATOMIC_TEMP3]], align 16
+// CHECK-NEXT: [[TMP8]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 16
+// CHECK-NEXT: br i1 [[TMP7]], label [[ATOMIC_CONT:%.*]], label [[ATOMIC_OP]]
+// CHECK: atomic_cont:
+// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT: [[ATOMIC_LOAD4:%.*]] = load atomic i128, ptr [[TMP9]] seq_cst, align 16
+// CHECK-NEXT: store i128 [[ATOMIC_LOAD4]], ptr [[ATOMIC_TEMP5]], align 16
+// CHECK-NEXT: [[TMP10:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP5]], align 16
+// CHECK-NEXT: ret x86_fp80 [[TMP10]]
+//
+// CHECK32-LABEL: define dso_local x86_fp80 @testcompassign(
+// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK32-NEXT: entry:
+// CHECK32-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK32-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// CHECK32-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 4
+// CHECK32-NEXT: br label [[ATOMIC_OP:%.*]]
+// CHECK32: atomic_op:
+// CHECK32-NEXT: [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[ATOMIC_OP]] ]
+// CHECK32-NEXT: [[SUB:%.*]] = fsub x86_fp80 [[TMP2]], 0xK4003C800000000000000
+// CHECK32-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], i8 0, i64 12, i1 false)
+// CHECK32-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 4
+// CHECK32-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false)
+// CHECK32-NEXT: store x86_fp80 [[SUB]], ptr [[ATOMIC_TEMP2]], align 4
+// CHECK32-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5)
+// CHECK32-NEXT: [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4
+// CHECK32-NEXT: br i1 [[CALL]], label [[ATOMIC_CONT:%.*]], label [[ATOMIC_OP]]
+// CHECK32: atomic_cont:
+// CHECK32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef 5)
+// CHECK32-NEXT: [[TMP5:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 4
+// CHECK32-NEXT: ret x86_fp80 [[TMP5]]
+//
long double testcompassign(_Atomic long double *addr) {
*addr -= 25;
- // CHECK-LABEL: @testcompassign
- // CHECK: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 8
- // CHECK: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 8
- // CHECK: [[INT_VALUE:%.+]] = load atomic i128, ptr [[ADDR]] seq_cst, align 16
- // CHECK: store i128 [[INT_VALUE]], ptr [[LD_ADDR:%.+]], align 16
- // CHECK: [[LD_VALUE:%.+]] = load x86_fp80, ptr [[LD_ADDR]], align 16
- // CHECK: br label %[[ATOMIC_OP:.+]]
- // CHECK: [[ATOMIC_OP]]
- // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[...
[truncated]
|
Hi @RKSimon, any more changes required for this!? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Thanks! |
Fixes #53079