-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Add out-of-line-atomics support to GlobalISel #74588
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This patch implement the GlobalISel counterpart to 4d7df43.
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-aarch64 Author: Thomas Preud'homme (RoboTux) ChangesThis patch implement the GlobalISel counterpart to Patch is 363.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/74588.diff 9 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 045fc78218dae..186937e597c5b 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -765,6 +765,166 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
return LegalizerHelper::Legalized;
}
+static RTLIB::Libcall
+getOutlineAtomicLibcall(unsigned Opc, AtomicOrdering Order, uint64_t MemSize) {
+ unsigned ModeN, ModelN;
+ switch (MemSize) {
+ case 1:
+ ModeN = 0;
+ break;
+ case 2:
+ ModeN = 1;
+ break;
+ case 4:
+ ModeN = 2;
+ break;
+ case 8:
+ ModeN = 3;
+ break;
+ case 16:
+ ModeN = 4;
+ break;
+ default:
+ return RTLIB::UNKNOWN_LIBCALL;
+ }
+
+ switch (Order) {
+ case AtomicOrdering::Monotonic:
+ ModelN = 0;
+ break;
+ case AtomicOrdering::Acquire:
+ ModelN = 1;
+ break;
+ case AtomicOrdering::Release:
+ ModelN = 2;
+ break;
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ ModelN = 3;
+ break;
+ default:
+ return RTLIB::UNKNOWN_LIBCALL;
+ }
+
+#define LCALLS(A, B) \
+ { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
+#define LCALL5(A) \
+ LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
+ switch (Opc) {
+ case TargetOpcode::G_ATOMIC_CMPXCHG:
+ case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
+ const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
+ return LC[ModeN][ModelN];
+ }
+ case TargetOpcode::G_ATOMICRMW_XCHG: {
+ const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
+ return LC[ModeN][ModelN];
+ }
+ case TargetOpcode::G_ATOMICRMW_ADD: {
+ const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
+ return LC[ModeN][ModelN];
+ }
+ case TargetOpcode::G_ATOMICRMW_AND: {
+ const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
+ return LC[ModeN][ModelN];
+ }
+ case TargetOpcode::G_ATOMICRMW_OR: {
+ const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
+ return LC[ModeN][ModelN];
+ }
+ case TargetOpcode::G_ATOMICRMW_XOR: {
+ const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
+ return LC[ModeN][ModelN];
+ }
+ default:
+ return RTLIB::UNKNOWN_LIBCALL;
+ }
+#undef LCALLS
+#undef LCALL5
+}
+
+static LegalizerHelper::LegalizeResult
+createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI) {
+ auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
+ MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
+
+ // Add all the args, except for the last which is an imm denoting 'tail'.
+ // const CallLowering::ArgInfo &Result,
+ // Operand 0 & 1 are return: 0 is old val, 1 is success, 2-4 are reg operands:
+ // 2 is ptr, 3 is expected, 4 is new
+ Type *RetTy;
+ SmallVector<Register> RetRegs;
+ SmallVector<CallLowering::ArgInfo, 3> Args;
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ case TargetOpcode::G_ATOMIC_CMPXCHG:
+ case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
+ Register Success;
+ LLT SuccessLLT;
+ auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
+ MI.getFirst4RegLLTs();
+ RetRegs.push_back(Ret);
+ RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
+ if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
+ std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
+ NewLLT) = MI.getFirst5RegLLTs();
+ RetRegs.push_back(Success);
+ RetTy = StructType::get(
+ Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
+ }
+ Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
+ Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
+ Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
+ break;
+ }
+ case TargetOpcode::G_ATOMICRMW_XCHG:
+ case TargetOpcode::G_ATOMICRMW_ADD:
+ case TargetOpcode::G_ATOMICRMW_AND:
+ case TargetOpcode::G_ATOMICRMW_OR:
+ case TargetOpcode::G_ATOMICRMW_XOR: {
+ auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
+ RetRegs.push_back(Ret);
+ RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
+ if (Opc == TargetOpcode::G_ATOMICRMW_AND) {
+ Register Tmp = MRI.createGenericVirtualRegister(ValLLT);
+ MIRBuilder.buildXor(Tmp, MIRBuilder.buildConstant(ValLLT, -1), Val);
+ Val = Tmp;
+ }
+ Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
+ Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
+ break;
+ }
+ default:
+ llvm_unreachable("unsupported opcode");
+ }
+
+ auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
+ auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
+ auto &AtomicMI = cast<GMemOperation>(MI);
+ auto Ordering = AtomicMI.getMMO().getMergedOrdering();
+ uint64_t MemSize = AtomicMI.getMemSize();
+ RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(Opc, Ordering, MemSize);
+ const char *Name = TLI.getLibcallName(RTLibcall);
+
+ // Unsupported libcall on the target.
+ if (!Name) {
+ LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
+ << MIRBuilder.getTII().getName(Opc) << "\n");
+ return LegalizerHelper::UnableToLegalize;
+ }
+
+ CallLowering::CallLoweringInfo Info;
+ Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
+ Info.Callee = MachineOperand::CreateES(Name);
+ Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
+
+ std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
+ if (!CLI.lowerCall(MIRBuilder, Info))
+ return LegalizerHelper::UnableToLegalize;
+
+ return LegalizerHelper::Legalized;
+}
+
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
Type *FromType) {
auto ToMVT = MVT::getVT(ToType);
@@ -1020,6 +1180,18 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
return Status;
break;
}
+ case TargetOpcode::G_ATOMICRMW_XCHG:
+ case TargetOpcode::G_ATOMICRMW_ADD:
+ case TargetOpcode::G_ATOMICRMW_AND:
+ case TargetOpcode::G_ATOMICRMW_OR:
+ case TargetOpcode::G_ATOMICRMW_XOR:
+ case TargetOpcode::G_ATOMIC_CMPXCHG:
+ case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
+ auto Status = createAtomicLibcall(MIRBuilder, MI);
+ if (Status != Legalized)
+ return Status;
+ break;
+ }
case TargetOpcode::G_BZERO:
case TargetOpcode::G_MEMCPY:
case TargetOpcode::G_MEMMOVE:
@@ -3793,6 +3965,17 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerTRUNC(MI);
GISEL_VECREDUCE_CASES_NONSEQ
return lowerVectorReduction(MI);
+ case G_ATOMICRMW_SUB: {
+ auto Val = MI.getOperand(2).getReg();
+ LLT ValLLT = MRI.getType(Val);
+ Register Tmp = MRI.createGenericVirtualRegister(ValLLT);
+ MIRBuilder.buildSub(Tmp, MIRBuilder.buildConstant(ValLLT, 0), Val);
+ auto [Ret, Mem] = MI.getFirst2Regs();
+ auto &MMO = cast<GMemOperation>(MI).getMMO();
+ MIRBuilder.buildAtomicRMWAdd(Ret, Mem, Tmp, MMO);
+ MI.eraseFromParent();
+ return Legalized;
+ }
}
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 21a412e9360dc..7fce3e501db57 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -758,16 +758,39 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(2, p0)));
getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
+ .libcallIf([&ST](const LegalityQuery &Query) {
+ return ST.outlineAtomics() && !ST.hasLSE();
+ })
.customIf([](const LegalityQuery &Query) {
return Query.Types[0].getSizeInBits() == 128;
})
.clampScalar(0, s32, s64)
.legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
+ getActionDefinitionsBuilder({G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD,
+ G_ATOMICRMW_AND, G_ATOMICRMW_OR,
+ G_ATOMICRMW_XOR})
+ .libcallIf([&ST](const LegalityQuery &Query) {
+ return ST.outlineAtomics() && !ST.hasLSE();
+ })
+ .clampScalar(0, s32, s64)
+ .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
+
+ getActionDefinitionsBuilder(G_ATOMICRMW_SUB)
+ .lowerIf([&ST](const LegalityQuery &Query) {
+ return ST.outlineAtomics() && !ST.hasLSE();
+ })
+ .clampScalar(0, s32, s64)
+ .legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
+
+ // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
+ // Don't outline them unless
+ // (1) high level <atomic> support approved:
+ // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
+ // (2) low level libgcc and compiler-rt support implemented by:
+ // min/max outline atomics helpers
getActionDefinitionsBuilder(
- {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND,
- G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX,
- G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
+ {G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX})
.clampScalar(0, s32, s64)
.legalIf(all(typeInSet(0, {s32, s64}), typeIs(1, p0)));
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-outline_atomics.ll
index fb4bef33d9b4f..fccafb29addbc 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-outline_atomics.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-outline_atomics.ll
@@ -229,11 +229,7 @@ define dso_local i64 @load_atomic_i64_aligned_seq_cst_const(ptr readonly %ptr) {
define dso_local i128 @load_atomic_i128_aligned_unordered(ptr %ptr) {
; -O0-LABEL: load_atomic_i128_aligned_unordered:
-; -O0: ldxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x0, x1, [x9]
+; -O0: bl __aarch64_cas16_relax
;
; -O1-LABEL: load_atomic_i128_aligned_unordered:
; -O1: ldxp x0, x1, [x8]
@@ -244,11 +240,7 @@ define dso_local i128 @load_atomic_i128_aligned_unordered(ptr %ptr) {
define dso_local i128 @load_atomic_i128_aligned_unordered_const(ptr readonly %ptr) {
; -O0-LABEL: load_atomic_i128_aligned_unordered_const:
-; -O0: ldxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x0, x1, [x9]
+; -O0: bl __aarch64_cas16_relax
;
; -O1-LABEL: load_atomic_i128_aligned_unordered_const:
; -O1: ldxp x0, x1, [x8]
@@ -259,11 +251,7 @@ define dso_local i128 @load_atomic_i128_aligned_unordered_const(ptr readonly %pt
define dso_local i128 @load_atomic_i128_aligned_monotonic(ptr %ptr) {
; -O0-LABEL: load_atomic_i128_aligned_monotonic:
-; -O0: ldxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x0, x1, [x9]
+; -O0: bl __aarch64_cas16_relax
;
; -O1-LABEL: load_atomic_i128_aligned_monotonic:
; -O1: ldxp x0, x1, [x8]
@@ -274,11 +262,7 @@ define dso_local i128 @load_atomic_i128_aligned_monotonic(ptr %ptr) {
define dso_local i128 @load_atomic_i128_aligned_monotonic_const(ptr readonly %ptr) {
; -O0-LABEL: load_atomic_i128_aligned_monotonic_const:
-; -O0: ldxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x0, x1, [x9]
+; -O0: bl __aarch64_cas16_relax
;
; -O1-LABEL: load_atomic_i128_aligned_monotonic_const:
; -O1: ldxp x0, x1, [x8]
@@ -289,11 +273,7 @@ define dso_local i128 @load_atomic_i128_aligned_monotonic_const(ptr readonly %pt
define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) {
; -O0-LABEL: load_atomic_i128_aligned_acquire:
-; -O0: ldaxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x0, x1, [x9]
+; -O0: bl __aarch64_cas16_acq
;
; -O1-LABEL: load_atomic_i128_aligned_acquire:
; -O1: ldaxp x0, x1, [x8]
@@ -304,11 +284,7 @@ define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) {
define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr) {
; -O0-LABEL: load_atomic_i128_aligned_acquire_const:
-; -O0: ldaxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x0, x1, [x9]
+; -O0: bl __aarch64_cas16_acq
;
; -O1-LABEL: load_atomic_i128_aligned_acquire_const:
; -O1: ldaxp x0, x1, [x8]
@@ -319,11 +295,7 @@ define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr)
define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
; -O0-LABEL: load_atomic_i128_aligned_seq_cst:
-; -O0: ldaxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x10, x10, [x9]
-; -O0: stlxp w8, x0, x1, [x9]
+; -O0: bl __aarch64_cas16_acq_rel
;
; -O1-LABEL: load_atomic_i128_aligned_seq_cst:
; -O1: ldaxp x0, x1, [x8]
@@ -334,11 +306,7 @@ define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
define dso_local i128 @load_atomic_i128_aligned_seq_cst_const(ptr readonly %ptr) {
; -O0-LABEL: load_atomic_i128_aligned_seq_cst_const:
-; -O0: ldaxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x10, x10, [x9]
-; -O0: stlxp w8, x0, x1, [x9]
+; -O0: bl __aarch64_cas16_acq_rel
;
; -O1-LABEL: load_atomic_i128_aligned_seq_cst_const:
; -O1: ldaxp x0, x1, [x8]
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-outline_atomics.ll
index 3d204b734d4a0..e594561010464 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-outline_atomics.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-outline_atomics.ll
@@ -117,14 +117,10 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_unordered:
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_relax
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: store_atomic_i128_aligned_unordered:
@@ -136,14 +132,10 @@ define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr
define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_monotonic:
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_relax
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: store_atomic_i128_aligned_monotonic:
@@ -155,14 +147,10 @@ define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr
define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_release:
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: store_atomic_i128_aligned_release:
@@ -174,14 +162,10 @@ define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr)
define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
index c660c139e35d4..e9b096e8c6c44 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
@@ -145,14 +145,10 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_relax
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
@@ -164,14 +160,10 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %val
define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xchg_i128_aligned_acquire:
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_acq
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_xchg_i128_aligned_acquire:
@@ -183,14 +175,10 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value
define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xchg_i128_aligned_release:
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_xchg_i128_aligned_release:
@@ -202,14 +190,10 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value
define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
+; -O0: bl __aarch64_cas16_acq_rel
+; -O0: eor x8, x0, x8
+; -O0: eor x9, x1, x9
+; -O0: orr x8, x8, x9
; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
@@ -221,14 +205,10 @@ define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value
define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xchg_i128_aligned_seq_cst:
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overall outline atomics part looks good, for global-isel probably @davemgreen can provide some input. Thank you for bringing outline atomics there!
return LC[ModeN][ModelN]; | ||
} | ||
case TargetOpcode::G_ATOMICRMW_ADD: { | ||
const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tests for ldadd4, ldadd8 outline atomics would be great to add.
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not an expert on atomics, but why would we have a libcall for -O0 but not for O1 in the tests?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are pointer type xchg / cmpxchg already tested?
I looked at it for the u?(min|max) and it seemed to boil down to the atomic expand pass being run at -O1 and above. |
No sorry, it's not that it's only run at O1 and above, it's that the output is different. At O0 it keeps the cmpxchg whereas at O1 it changes the cmpxchg into a ldxr + stlxr intrinsics. |
That explains why -O0 differs from -O1 for nand and u?(min|max) |
This sounds extremely unsound |
It is completely unsound. The constraints that approximately every architecture has on LL/SC loops make it unsound to ever generate isolated LL and SC instructions at the IR level, as we are currently doing on AArch64, ARM, and Hexagon. We already added the infrastructure to do this properly, and used it for RISCV, and it's now also being used on Loongarch. ARM/AArch64 should be updated to match. See discussion in https://lists.llvm.org/pipermail/llvm-dev/2018-June/123993.html |
Just to clarify, is the expectation for me to solve this in this patch or can this be done in a separate patch? Note that this is existing code exercised when compiling at -O1 or above (i.e. when not using GlobalISel) for some of the atomics. Best regards, |
This should be separate, it's an unrelated issue |
My apologies @arsenm, I had missed your review comments. All fixed now. |
Thanks for fixing the unused variable @DamonFool , I was about to revert and push a new patch. |
This patch implement the GlobalISel counterpart to
4d7df43.