Skip to content

[Clang][CodeGen] Optimised LLVM IR for atomic increments/decrements on floats #89362

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 2, 2024

Conversation

Krishna-13-cyber
Copy link
Contributor

@Krishna-13-cyber Krishna-13-cyber commented Apr 19, 2024

Fixes #53079

@Krishna-13-cyber Krishna-13-cyber marked this pull request as draft April 19, 2024 09:44
@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:codegen IR generation bugs: mangling, exceptions, etc. labels Apr 19, 2024
@llvmbot
Copy link
Member

llvmbot commented Apr 19, 2024

@llvm/pr-subscribers-backend-x86
@llvm/pr-subscribers-clang-codegen

@llvm/pr-subscribers-clang

Author: Krishna Narayanan (Krishna-13-cyber)

Changes

Addresses #53079


Patch is 48.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/89362.diff

3 Files Affected:

  • (modified) clang/lib/CodeGen/CGExprScalar.cpp (+13)
  • (added) clang/test/CodeGen/X86/x86-atomic-float.c (+97)
  • (modified) clang/test/CodeGen/X86/x86-atomic-long_double.c (+300-309)
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 1f18e0d5ba409a..e97bb5c7e9dd16 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2792,6 +2792,19 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
                                   llvm::AtomicOrdering::SequentiallyConsistent);
       return isPre ? Builder.CreateBinOp(op, old, amt) : old;
     }
+    // Special case for atomic increment/decrement on floats
+    if (type->isFloatingType()) {
+      llvm::AtomicRMWInst::BinOp aop =
+          isInc ? llvm::AtomicRMWInst::FAdd : llvm::AtomicRMWInst::FSub;
+      llvm::Instruction::BinaryOps op =
+          isInc ? llvm::Instruction::FAdd : llvm::Instruction::FSub;
+      llvm::Value *amt = llvm::ConstantFP::get(
+          VMContext, llvm::APFloat(static_cast<float>(amount)));
+      llvm::Value *old =
+          Builder.CreateAtomicRMW(aop, LV.getAddress(CGF), amt,
+                                  llvm::AtomicOrdering::SequentiallyConsistent);
+      return isPre ? Builder.CreateBinOp(op, old, amt) : old;
+    }
     value = EmitLoadOfLValue(LV, E->getExprLoc());
     input = value;
     // For every other atomic operation, we need to emit a load-op-cmpxchg loop
diff --git a/clang/test/CodeGen/X86/x86-atomic-float.c b/clang/test/CodeGen/X86/x86-atomic-float.c
new file mode 100644
index 00000000000000..89a2605ed44461
--- /dev/null
+++ b/clang/test/CodeGen/X86/x86-atomic-float.c
@@ -0,0 +1,97 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu core2 %s -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu core2 %s -S -emit-llvm -o - | FileCheck -check-prefix=CHECK32 %s
+
+// CHECK-LABEL: define dso_local i32 @test_int_inc(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw add ptr @test_int_inc.n, i32 1 seq_cst, align 4
+// CHECK-NEXT:    ret i32 [[TMP0]]
+//
+// CHECK32-LABEL: define dso_local i32 @test_int_inc(
+// CHECK32-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK32-NEXT:  entry:
+// CHECK32-NEXT:    [[TMP0:%.*]] = atomicrmw add ptr @test_int_inc.n, i32 1 seq_cst, align 4
+// CHECK32-NEXT:    ret i32 [[TMP0]]
+//
+int test_int_inc()
+{
+    static _Atomic int n;
+    return n++;
+}
+
+// CHECK-LABEL: define dso_local float @test_float_post_inc(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr @test_float_post_inc.n, float 1.000000e+00 seq_cst, align 4
+// CHECK-NEXT:    ret float [[TMP0]]
+//
+// CHECK32-LABEL: define dso_local float @test_float_post_inc(
+// CHECK32-SAME: ) #[[ATTR0]] {
+// CHECK32-NEXT:  entry:
+// CHECK32-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr @test_float_post_inc.n, float 1.000000e+00 seq_cst, align 4
+// CHECK32-NEXT:    ret float [[TMP0]]
+//
+float test_float_post_inc()
+{
+    static _Atomic float n;
+    return n++;
+}
+
+// CHECK-LABEL: define dso_local float @test_float_post_dc(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fsub ptr @test_float_post_dc.n, float -1.000000e+00 seq_cst, align 4
+// CHECK-NEXT:    ret float [[TMP0]]
+//
+// CHECK32-LABEL: define dso_local float @test_float_post_dc(
+// CHECK32-SAME: ) #[[ATTR0]] {
+// CHECK32-NEXT:  entry:
+// CHECK32-NEXT:    [[TMP0:%.*]] = atomicrmw fsub ptr @test_float_post_dc.n, float -1.000000e+00 seq_cst, align 4
+// CHECK32-NEXT:    ret float [[TMP0]]
+//
+float test_float_post_dc()
+{
+    static _Atomic float n;
+    return n--;
+}
+
+// CHECK-LABEL: define dso_local float @test_float_pre_dc(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fsub ptr @test_float_pre_dc.n, float -1.000000e+00 seq_cst, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = fsub float [[TMP0]], -1.000000e+00
+// CHECK-NEXT:    ret float [[TMP1]]
+//
+// CHECK32-LABEL: define dso_local float @test_float_pre_dc(
+// CHECK32-SAME: ) #[[ATTR0]] {
+// CHECK32-NEXT:  entry:
+// CHECK32-NEXT:    [[TMP0:%.*]] = atomicrmw fsub ptr @test_float_pre_dc.n, float -1.000000e+00 seq_cst, align 4
+// CHECK32-NEXT:    [[TMP1:%.*]] = fsub float [[TMP0]], -1.000000e+00
+// CHECK32-NEXT:    ret float [[TMP1]]
+//
+float test_float_pre_dc()
+{
+    static _Atomic float n;
+    return --n;
+}
+
+// CHECK-LABEL: define dso_local float @test_float_pre_inc(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr @test_float_pre_inc.n, float 1.000000e+00 seq_cst, align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
+// CHECK-NEXT:    ret float [[TMP1]]
+//
+// CHECK32-LABEL: define dso_local float @test_float_pre_inc(
+// CHECK32-SAME: ) #[[ATTR0]] {
+// CHECK32-NEXT:  entry:
+// CHECK32-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr @test_float_pre_inc.n, float 1.000000e+00 seq_cst, align 4
+// CHECK32-NEXT:    [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
+// CHECK32-NEXT:    ret float [[TMP1]]
+//
+float test_float_pre_inc()
+{
+    static _Atomic float n;
+    return ++n;
+}
diff --git a/clang/test/CodeGen/X86/x86-atomic-long_double.c b/clang/test/CodeGen/X86/x86-atomic-long_double.c
index ca3fb6730fb640..b7f393f51c791e 100644
--- a/clang/test/CodeGen/X86/x86-atomic-long_double.c
+++ b/clang/test/CodeGen/X86/x86-atomic-long_double.c
@@ -1,351 +1,342 @@
 // RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu core2 %s -S -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu core2 %s -S -emit-llvm -o - | FileCheck -check-prefix=CHECK32 %s
 
+// CHECK-LABEL: define dso_local x86_fp80 @testinc(
+// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00
+// CHECK-NEXT:    store float [[TMP2]], ptr [[RETVAL]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16
+// CHECK-NEXT:    ret x86_fp80 [[TMP3]]
+//
+// CHECK32-LABEL: define dso_local x86_fp80 @testinc(
+// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK32-NEXT:  entry:
+// CHECK32-NEXT:    [[RETVAL:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK32-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4
+// CHECK32-NEXT:    [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00
+// CHECK32-NEXT:    store float [[TMP2]], ptr [[RETVAL]], align 4
+// CHECK32-NEXT:    [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4
+// CHECK32-NEXT:    ret x86_fp80 [[TMP3]]
+//
 long double testinc(_Atomic long double *addr) {
-  // CHECK-LABEL: @testinc
-  // CHECK: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 8
-  // CHECK: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 8
-  // CHECK: [[INT_VALUE:%.+]] = load atomic i128, ptr [[ADDR]] seq_cst, align 16
-  // CHECK: store i128 [[INT_VALUE]], ptr [[LD_ADDR:%.+]], align 16
-  // CHECK: [[LD_VALUE:%.+]] = load x86_fp80, ptr [[LD_ADDR]], align 16
-  // CHECK: br label %[[ATOMIC_OP:.+]]
-  // CHECK: [[ATOMIC_OP]]
-  // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
-  // CHECK: [[INC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
-  // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[OLD_VALUE_ADDR:%.+]], i8 0, i64 16, i1 false)
-  // CHECK: store x86_fp80 [[OLD_VALUE]], ptr [[OLD_VALUE_ADDR]], align 16
-  // CHECK: [[OLD_INT:%.+]] = load i128, ptr [[OLD_VALUE_ADDR]], align 16
-  // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[NEW_VALUE_ADDR:%.+]], i8 0, i64 16, i1 false)
-  // CHECK: store x86_fp80 [[INC_VALUE]], ptr [[NEW_VALUE_ADDR]], align 16
-  // CHECK: [[NEW_INT:%.+]] = load i128, ptr [[NEW_VALUE_ADDR]], align 16
-  // CHECK: [[RES:%.+]] = cmpxchg ptr [[ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst, align 16
-  // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
-  // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
-  // CHECK: store i128 [[OLD_VALUE]], ptr [[OLD_VALUE_RES_PTR:%.+]], align 16
-  // CHECK: [[LD_VALUE]] = load x86_fp80, ptr [[OLD_VALUE_RES_PTR]], align 16
-  // CHECK: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
-  // CHECK: [[ATOMIC_CONT]]
-  // CHECK: ret x86_fp80 [[INC_VALUE]]
-  // CHECK32-LABEL: @testinc
-  // CHECK32: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 4
-  // CHECK32: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 4
-  // CHECK32: call void @__atomic_load(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[TEMP_LD_ADDR:%.+]], i32 noundef 5)
-  // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80, ptr [[TEMP_LD_ADDR]], align 4
-  // CHECK32: br label %[[ATOMIC_OP:.+]]
-  // CHECK32: [[ATOMIC_OP]]
-  // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
-  // CHECK32: [[INC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
-  // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[OLD_VALUE_ADDR:%.+]], i8 0, i64 12, i1 false)
-  // CHECK32: store x86_fp80 [[OLD_VALUE]], ptr [[OLD_VALUE_ADDR]], align 4
-  // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[DESIRED_VALUE_ADDR:%.+]], i8 0, i64 12, i1 false)
-  // CHECK32: store x86_fp80 [[INC_VALUE]], ptr [[DESIRED_VALUE_ADDR]], align 4
-  // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[OLD_VALUE_ADDR]], ptr noundef [[DESIRED_VALUE_ADDR]], i32 noundef 5, i32 noundef 5)
-  // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80, ptr [[OLD_VALUE_ADDR]], align 4
-  // CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
-  // CHECK32: [[ATOMIC_CONT]]
-  // CHECK32: ret x86_fp80 [[INC_VALUE]]
 
   return ++*addr;
 }
 
+// CHECK-LABEL: define dso_local x86_fp80 @testdec(
+// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float -1.000000e+00 seq_cst, align 16
+// CHECK-NEXT:    store float [[TMP1]], ptr [[RETVAL]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16
+// CHECK-NEXT:    ret x86_fp80 [[TMP2]]
+//
+// CHECK32-LABEL: define dso_local x86_fp80 @testdec(
+// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK32-NEXT:  entry:
+// CHECK32-NEXT:    [[RETVAL:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK32-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float -1.000000e+00 seq_cst, align 4
+// CHECK32-NEXT:    store float [[TMP1]], ptr [[RETVAL]], align 4
+// CHECK32-NEXT:    [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4
+// CHECK32-NEXT:    ret x86_fp80 [[TMP2]]
+//
 long double testdec(_Atomic long double *addr) {
-  // CHECK-LABEL: @testdec
-  // CHECK: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 8
-  // CHECK: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 8
-  // CHECK: [[INT_VALUE:%.+]] = load atomic i128, ptr [[ADDR]] seq_cst, align 16
-  // CHECK: store i128 [[INT_VALUE]], ptr [[LD_ADDR:%.+]], align 16
-  // CHECK: [[ORIG_LD_VALUE:%.+]] = load x86_fp80, ptr [[LD_ADDR]], align 16
-  // CHECK: br label %[[ATOMIC_OP:.+]]
-  // CHECK: [[ATOMIC_OP]]
-  // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
-  // CHECK: [[DEC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
-  // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[OLD_VALUE_ADDR:%.+]], i8 0, i64 16, i1 false)
-  // CHECK: store x86_fp80 [[OLD_VALUE]], ptr [[OLD_VALUE_ADDR]], align 16
-  // CHECK: [[OLD_INT:%.+]] = load i128, ptr [[OLD_VALUE_ADDR]], align 16
-  // CHECK: call void @llvm.memset.p0.i64(ptr align 16 [[NEW_VALUE_ADDR:%.+]], i8 0, i64 16, i1 false)
-  // CHECK: store x86_fp80 [[DEC_VALUE]], ptr [[NEW_VALUE_ADDR]], align 16
-  // CHECK: [[NEW_INT:%.+]] = load i128, ptr [[NEW_VALUE_ADDR]], align 16
-  // CHECK: [[RES:%.+]] = cmpxchg ptr [[ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst, align 16
-  // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0
-  // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1
-  // CHECK: store i128 [[OLD_VALUE]], ptr [[OLD_VALUE_RES_PTR:%.+]], align 16
-  // CHECK: [[LD_VALUE]] = load x86_fp80, ptr [[OLD_VALUE_RES_PTR]], align 16
-  // CHECK: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
-  // CHECK: [[ATOMIC_CONT]]
-  // CHECK: ret x86_fp80 [[ORIG_LD_VALUE]]
-  // CHECK32-LABEL: @testdec
-  // CHECK32: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 4
-  // CHECK32: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 4
-  // CHECK32: call void @__atomic_load(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[TEMP_LD_ADDR:%.+]], i32 noundef 5)
-  // CHECK32: [[ORIG_LD_VALUE:%.+]] = load x86_fp80, ptr [[TEMP_LD_ADDR]], align 4
-  // CHECK32: br label %[[ATOMIC_OP:.+]]
-  // CHECK32: [[ATOMIC_OP]]
-  // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ]
-  // CHECK32: [[DEC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]],
-  // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[OLD_VALUE_ADDR:%.+]], i8 0, i64 12, i1 false)
-  // CHECK32: store x86_fp80 [[OLD_VALUE]], ptr [[OLD_VALUE_ADDR]], align 4
-  // CHECK32: call void @llvm.memset.p0.i64(ptr align 4 [[DESIRED_VALUE_ADDR:%.+]], i8 0, i64 12, i1 false)
-  // CHECK32: store x86_fp80 [[DEC_VALUE]], ptr [[DESIRED_VALUE_ADDR]], align 4
-  // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[ADDR]], ptr noundef [[OLD_VALUE_ADDR]], ptr noundef [[DESIRED_VALUE_ADDR]], i32 noundef 5, i32 noundef 5)
-  // CHECK32: [[LD_VALUE]] = load x86_fp80, ptr [[OLD_VALUE_ADDR]], align 4
-  // CHECK32: br i1 [[FAIL_SUCCESS]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]]
-  // CHECK32: [[ATOMIC_CONT]]
-  // CHECK32: ret x86_fp80 [[ORIG_LD_VALUE]]
 
   return (*addr)--;
 }
 
+// CHECK-LABEL: define dso_local x86_fp80 @testcompassign(
+// CHECK-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    [[ATOMIC_TEMP5:%.*]] = alloca x86_fp80, align 16
+// CHECK-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[ATOMIC_LOAD:%.*]] = load atomic i128, ptr [[TMP0]] seq_cst, align 16
+// CHECK-NEXT:    store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 16
+// CHECK-NEXT:    br label [[ATOMIC_OP:%.*]]
+// CHECK:       atomic_op:
+// CHECK-NEXT:    [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[ATOMIC_OP]] ]
+// CHECK-NEXT:    [[SUB:%.*]] = fsub x86_fp80 [[TMP2]], 0xK4003C800000000000000
+// CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP1]], i8 0, i64 16, i1 false)
+// CHECK-NEXT:    store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load i128, ptr [[ATOMIC_TEMP1]], align 16
+// CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP2]], i8 0, i64 16, i1 false)
+// CHECK-NEXT:    store x86_fp80 [[SUB]], ptr [[ATOMIC_TEMP2]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = load i128, ptr [[ATOMIC_TEMP2]], align 16
+// CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg ptr [[TMP0]], i128 [[TMP3]], i128 [[TMP4]] seq_cst seq_cst, align 16
+// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i128, i1 } [[TMP5]], 0
+// CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i128, i1 } [[TMP5]], 1
+// CHECK-NEXT:    store i128 [[TMP6]], ptr [[ATOMIC_TEMP3]], align 16
+// CHECK-NEXT:    [[TMP8]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 16
+// CHECK-NEXT:    br i1 [[TMP7]], label [[ATOMIC_CONT:%.*]], label [[ATOMIC_OP]]
+// CHECK:       atomic_cont:
+// CHECK-NEXT:    [[TMP9:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[ATOMIC_LOAD4:%.*]] = load atomic i128, ptr [[TMP9]] seq_cst, align 16
+// CHECK-NEXT:    store i128 [[ATOMIC_LOAD4]], ptr [[ATOMIC_TEMP5]], align 16
+// CHECK-NEXT:    [[TMP10:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP5]], align 16
+// CHECK-NEXT:    ret x86_fp80 [[TMP10]]
+//
+// CHECK32-LABEL: define dso_local x86_fp80 @testcompassign(
+// CHECK32-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] {
+// CHECK32-NEXT:  entry:
+// CHECK32-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK32-NEXT:    [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 4
+// CHECK32-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5)
+// CHECK32-NEXT:    [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 4
+// CHECK32-NEXT:    br label [[ATOMIC_OP:%.*]]
+// CHECK32:       atomic_op:
+// CHECK32-NEXT:    [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[ATOMIC_OP]] ]
+// CHECK32-NEXT:    [[SUB:%.*]] = fsub x86_fp80 [[TMP2]], 0xK4003C800000000000000
+// CHECK32-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], i8 0, i64 12, i1 false)
+// CHECK32-NEXT:    store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 4
+// CHECK32-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false)
+// CHECK32-NEXT:    store x86_fp80 [[SUB]], ptr [[ATOMIC_TEMP2]], align 4
+// CHECK32-NEXT:    [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5)
+// CHECK32-NEXT:    [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4
+// CHECK32-NEXT:    br i1 [[CALL]], label [[ATOMIC_CONT:%.*]], label [[ATOMIC_OP]]
+// CHECK32:       atomic_cont:
+// CHECK32-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4
+// CHECK32-NEXT:    call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef 5)
+// CHECK32-NEXT:    [[TMP5:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 4
+// CHECK32-NEXT:    ret x86_fp80 [[TMP5]]
+//
 long double testcompassign(_Atomic long double *addr) {
   *addr -= 25;
-  // CHECK-LABEL: @testcompassign
-  // CHECK: store ptr %{{.+}}, ptr [[ADDR_ADDR:%.+]], align 8
-  // CHECK: [[ADDR:%.+]] = load ptr, ptr [[ADDR_ADDR]], align 8
-  // CHECK: [[INT_VALUE:%.+]] = load atomic i128, ptr [[ADDR]] seq_cst, align 16
-  // CHECK: store i128 [[INT_VALUE]], ptr [[LD_ADDR:%.+]], align 16
-  // CHECK: [[LD_VALUE:%.+]] = load x86_fp80, ptr [[LD_ADDR]], align 16
-  // CHECK: br label %[[ATOMIC_OP:.+]]
-  // CHECK: [[ATOMIC_OP]]
-  // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[...
[truncated]

@Krishna-13-cyber Krishna-13-cyber marked this pull request as ready for review April 19, 2024 17:11
@Krishna-13-cyber
Copy link
Contributor Author

Hi @RKSimon, any more changes required for this!?

Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@RKSimon RKSimon merged commit f17b1fb into llvm:main May 2, 2024
@Krishna-13-cyber
Copy link
Contributor Author

LGTM

Thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:X86 clang:codegen IR generation bugs: mangling, exceptions, etc. clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

[clang] produces incorrect LLVM IR for atomic post increments on floats
3 participants