Skip to content

Commit b60f801

Browse files
committed
[LoongArch] Add codegen support for atomicrmw xchg operation on LA64
In order to avoid the patch being too large, the atomicrmw xchg operation on LA32 will be added later Differential Revision: https://reviews.llvm.org/D131228
1 parent 250cde6 commit b60f801

File tree

11 files changed

+423
-0
lines changed

11 files changed

+423
-0
lines changed

llvm/include/llvm/IR/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ tablegen(LLVM IntrinsicsARM.h -gen-intrinsic-enums -intrinsic-prefix=arm)
1010
tablegen(LLVM IntrinsicsBPF.h -gen-intrinsic-enums -intrinsic-prefix=bpf)
1111
tablegen(LLVM IntrinsicsDirectX.h -gen-intrinsic-enums -intrinsic-prefix=dx)
1212
tablegen(LLVM IntrinsicsHexagon.h -gen-intrinsic-enums -intrinsic-prefix=hexagon)
13+
tablegen(LLVM IntrinsicsLoongArch.h -gen-intrinsic-enums -intrinsic-prefix=loongarch)
1314
tablegen(LLVM IntrinsicsMips.h -gen-intrinsic-enums -intrinsic-prefix=mips)
1415
tablegen(LLVM IntrinsicsNVPTX.h -gen-intrinsic-enums -intrinsic-prefix=nvvm)
1516
tablegen(LLVM IntrinsicsPowerPC.h -gen-intrinsic-enums -intrinsic-prefix=ppc)

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2068,3 +2068,4 @@ include "llvm/IR/IntrinsicsRISCV.td"
20682068
include "llvm/IR/IntrinsicsSPIRV.td"
20692069
include "llvm/IR/IntrinsicsVE.td"
20702070
include "llvm/IR/IntrinsicsDirectX.td"
2071+
include "llvm/IR/IntrinsicsLoongArch.td"
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
//===- IntrinsicsLoongArch.td - Defines LoongArch intrinsics *- tablegen -*===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file defines all of the LoongArch-specific intrinsics.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
let TargetPrefix = "loongarch" in {
14+
15+
//===----------------------------------------------------------------------===//
16+
// Atomics
17+
18+
// T @llvm.<name>.T.<p>(any*, T, T, T imm);
19+
class MaskedAtomicRMW<LLVMType itype>
20+
: Intrinsic<[itype], [llvm_anyptr_ty, itype, itype, itype],
21+
[IntrArgMemOnly, NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<3>>]>;
22+
23+
// We define 32-bit and 64-bit variants of the above, where T stands for i32
24+
// or i64 respectively:
25+
multiclass MaskedAtomicRMWIntrinsics {
26+
// i64 @llvm.<name>.i32.<p>(any*, i64, i64, i64 imm);
27+
def _i64 : MaskedAtomicRMW<llvm_i64_ty>;
28+
}
29+
30+
defm int_loongarch_masked_atomicrmw_xchg : MaskedAtomicRMWIntrinsics;
31+
} // TargetPrefix = "loongarch"

llvm/lib/Target/LoongArch/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ add_public_tablegen_target(LoongArchCommonTableGen)
1616

1717
add_llvm_target(LoongArchCodeGen
1818
LoongArchAsmPrinter.cpp
19+
LoongArchExpandAtomicPseudoInsts.cpp
1920
LoongArchFrameLowering.cpp
2021
LoongArchInstrInfo.cpp
2122
LoongArchISelDAGToDAG.cpp

llvm/lib/Target/LoongArch/LoongArch.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ class MCInst;
2525
class MCOperand;
2626
class MachineInstr;
2727
class MachineOperand;
28+
class PassRegistry;
2829

2930
bool lowerLoongArchMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
3031
AsmPrinter &AP);
@@ -33,6 +34,8 @@ bool lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO,
3334
const AsmPrinter &AP);
3435

3536
FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM);
37+
FunctionPass *createLoongArchExpandAtomicPseudoPass();
38+
void initializeLoongArchExpandAtomicPseudoPass(PassRegistry &);
3639
} // end namespace llvm
3740

3841
#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H
Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
//==- LoongArchExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file contains a pass that expands atomic pseudo instructions into
10+
// target instructions. This pass should be run at the last possible moment,
11+
// avoiding the possibility for other passes to break the requirements for
12+
// forward progress in the LL/SC block.
13+
//
14+
//===----------------------------------------------------------------------===//
15+
16+
#include "LoongArch.h"
17+
#include "LoongArchInstrInfo.h"
18+
#include "LoongArchTargetMachine.h"
19+
20+
#include "llvm/CodeGen/LivePhysRegs.h"
21+
#include "llvm/CodeGen/MachineFunctionPass.h"
22+
#include "llvm/CodeGen/MachineInstrBuilder.h"
23+
24+
using namespace llvm;
25+
26+
#define LoongArch_EXPAND_ATOMIC_PSEUDO_NAME \
27+
"LoongArch atomic pseudo instruction expansion pass"
28+
29+
namespace {
30+
31+
class LoongArchExpandAtomicPseudo : public MachineFunctionPass {
32+
public:
33+
const LoongArchInstrInfo *TII;
34+
static char ID;
35+
36+
LoongArchExpandAtomicPseudo() : MachineFunctionPass(ID) {
37+
initializeLoongArchExpandAtomicPseudoPass(*PassRegistry::getPassRegistry());
38+
}
39+
40+
bool runOnMachineFunction(MachineFunction &MF) override;
41+
42+
StringRef getPassName() const override {
43+
return LoongArch_EXPAND_ATOMIC_PSEUDO_NAME;
44+
}
45+
46+
private:
47+
bool expandMBB(MachineBasicBlock &MBB);
48+
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
49+
MachineBasicBlock::iterator &NextMBBI);
50+
bool expandAtomicBinOp(MachineBasicBlock &MBB,
51+
MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
52+
bool IsMasked, int Width,
53+
MachineBasicBlock::iterator &NextMBBI);
54+
};
55+
56+
char LoongArchExpandAtomicPseudo::ID = 0;
57+
58+
bool LoongArchExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
59+
TII =
60+
static_cast<const LoongArchInstrInfo *>(MF.getSubtarget().getInstrInfo());
61+
bool Modified = false;
62+
for (auto &MBB : MF)
63+
Modified |= expandMBB(MBB);
64+
return Modified;
65+
}
66+
67+
bool LoongArchExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
68+
bool Modified = false;
69+
70+
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
71+
while (MBBI != E) {
72+
MachineBasicBlock::iterator NMBBI = std::next(MBBI);
73+
Modified |= expandMI(MBB, MBBI, NMBBI);
74+
MBBI = NMBBI;
75+
}
76+
77+
return Modified;
78+
}
79+
80+
bool LoongArchExpandAtomicPseudo::expandMI(
81+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
82+
MachineBasicBlock::iterator &NextMBBI) {
83+
switch (MBBI->getOpcode()) {
84+
case LoongArch::PseudoMaskedAtomicSwap32:
85+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
86+
NextMBBI);
87+
}
88+
return false;
89+
}
90+
91+
static void insertMaskedMerge(const LoongArchInstrInfo *TII, DebugLoc DL,
92+
MachineBasicBlock *MBB, Register DestReg,
93+
Register OldValReg, Register NewValReg,
94+
Register MaskReg, Register ScratchReg) {
95+
assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
96+
assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
97+
assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
98+
99+
// res = oldval ^ ((oldval ^ newval) & masktargetdata);
100+
BuildMI(MBB, DL, TII->get(LoongArch::XOR), ScratchReg)
101+
.addReg(OldValReg)
102+
.addReg(NewValReg);
103+
BuildMI(MBB, DL, TII->get(LoongArch::AND), ScratchReg)
104+
.addReg(ScratchReg)
105+
.addReg(MaskReg);
106+
BuildMI(MBB, DL, TII->get(LoongArch::XOR), DestReg)
107+
.addReg(OldValReg)
108+
.addReg(ScratchReg);
109+
}
110+
111+
static void doMaskedAtomicBinOpExpansion(
112+
const LoongArchInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
113+
MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
114+
MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
115+
assert(Width == 32 && "Should never need to expand masked 64-bit operations");
116+
Register DestReg = MI.getOperand(0).getReg();
117+
Register ScratchReg = MI.getOperand(1).getReg();
118+
Register AddrReg = MI.getOperand(2).getReg();
119+
Register IncrReg = MI.getOperand(3).getReg();
120+
Register MaskReg = MI.getOperand(4).getReg();
121+
122+
// .loop:
123+
// dbar 0
124+
// ll.w destreg, (alignedaddr)
125+
// binop scratch, destreg, incr
126+
// xor scratch, destreg, scratch
127+
// and scratch, scratch, masktargetdata
128+
// xor scratch, destreg, scratch
129+
// sc.w scratch, scratch, (alignedaddr)
130+
// beq scratch, zero, loop
131+
BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
132+
BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg)
133+
.addReg(AddrReg)
134+
.addImm(0);
135+
switch (BinOp) {
136+
default:
137+
llvm_unreachable("Unexpected AtomicRMW BinOp");
138+
case AtomicRMWInst::Xchg:
139+
BuildMI(LoopMBB, DL, TII->get(LoongArch::ADDI_W), ScratchReg)
140+
.addReg(IncrReg)
141+
.addImm(0);
142+
break;
143+
// TODO: support other AtomicRMWInst.
144+
}
145+
146+
insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
147+
ScratchReg);
148+
149+
BuildMI(LoopMBB, DL, TII->get(LoongArch::SC_W), ScratchReg)
150+
.addReg(ScratchReg)
151+
.addReg(AddrReg)
152+
.addImm(0);
153+
BuildMI(LoopMBB, DL, TII->get(LoongArch::BEQ))
154+
.addReg(ScratchReg)
155+
.addReg(LoongArch::R0)
156+
.addMBB(LoopMBB);
157+
}
158+
159+
bool LoongArchExpandAtomicPseudo::expandAtomicBinOp(
160+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
161+
AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
162+
MachineBasicBlock::iterator &NextMBBI) {
163+
MachineInstr &MI = *MBBI;
164+
DebugLoc DL = MI.getDebugLoc();
165+
166+
MachineFunction *MF = MBB.getParent();
167+
auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
168+
auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
169+
170+
// Insert new MBBs.
171+
MF->insert(++MBB.getIterator(), LoopMBB);
172+
MF->insert(++LoopMBB->getIterator(), DoneMBB);
173+
174+
// Set up successors and transfer remaining instructions to DoneMBB.
175+
LoopMBB->addSuccessor(LoopMBB);
176+
LoopMBB->addSuccessor(DoneMBB);
177+
DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
178+
DoneMBB->transferSuccessors(&MBB);
179+
MBB.addSuccessor(LoopMBB);
180+
181+
if (IsMasked)
182+
doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
183+
Width);
184+
// TODO: support IsMasked = false.
185+
186+
NextMBBI = MBB.end();
187+
MI.eraseFromParent();
188+
189+
LivePhysRegs LiveRegs;
190+
computeAndAddLiveIns(LiveRegs, *LoopMBB);
191+
computeAndAddLiveIns(LiveRegs, *DoneMBB);
192+
193+
return true;
194+
}
195+
196+
} // end namespace
197+
198+
INITIALIZE_PASS(LoongArchExpandAtomicPseudo, "loongarch-expand-atomic-pseudo",
199+
LoongArch_EXPAND_ATOMIC_PSEUDO_NAME, false, false)
200+
201+
namespace llvm {
202+
203+
FunctionPass *createLoongArchExpandAtomicPseudoPass() {
204+
return new LoongArchExpandAtomicPseudo();
205+
}
206+
207+
} // end namespace llvm

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
#include "MCTargetDesc/LoongArchMCTargetDesc.h"
2222
#include "llvm/ADT/Statistic.h"
2323
#include "llvm/CodeGen/ISDOpcodes.h"
24+
#include "llvm/IR/IRBuilder.h"
25+
#include "llvm/IR/IntrinsicsLoongArch.h"
2426
#include "llvm/Support/Debug.h"
2527
#include "llvm/Support/KnownBits.h"
2628

@@ -137,6 +139,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
137139

138140
setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
139141

142+
setMinCmpXchgSizeInBits(32);
143+
140144
// Function alignments.
141145
const Align FunctionAlignment(4);
142146
setMinFunctionAlignment(FunctionAlignment);
@@ -1779,3 +1783,55 @@ bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
17791783
// TODO: Support vectors.
17801784
return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
17811785
}
1786+
1787+
TargetLowering::AtomicExpansionKind
1788+
LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1789+
// TODO: Add more AtomicRMWInst that needs to be extended.
1790+
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
1791+
if (Size == 8 || Size == 16)
1792+
return AtomicExpansionKind::MaskedIntrinsic;
1793+
return AtomicExpansionKind::None;
1794+
}
1795+
1796+
static Intrinsic::ID
1797+
getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
1798+
AtomicRMWInst::BinOp BinOp) {
1799+
if (GRLen == 64) {
1800+
switch (BinOp) {
1801+
default:
1802+
llvm_unreachable("Unexpected AtomicRMW BinOp");
1803+
case AtomicRMWInst::Xchg:
1804+
return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
1805+
// TODO: support other AtomicRMWInst.
1806+
}
1807+
}
1808+
1809+
llvm_unreachable("Unexpected GRLen\n");
1810+
}
1811+
1812+
Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
1813+
IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
1814+
Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
1815+
unsigned GRLen = Subtarget.getGRLen();
1816+
Value *Ordering =
1817+
Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
1818+
Type *Tys[] = {AlignedAddr->getType()};
1819+
Function *LlwOpScwLoop = Intrinsic::getDeclaration(
1820+
AI->getModule(),
1821+
getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys);
1822+
1823+
if (GRLen == 64) {
1824+
Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
1825+
Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
1826+
ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
1827+
}
1828+
1829+
Value *Result;
1830+
1831+
Result =
1832+
Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
1833+
1834+
if (GRLen == 64)
1835+
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
1836+
return Result;
1837+
}

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,13 @@ class LoongArchTargetLowering : public TargetLowering {
9999
bool isCheapToSpeculateCttz(Type *Ty) const override;
100100
bool isCheapToSpeculateCtlz(Type *Ty) const override;
101101
bool hasAndNot(SDValue Y) const override;
102+
TargetLowering::AtomicExpansionKind
103+
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
104+
105+
Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI,
106+
Value *AlignedAddr, Value *Incr,
107+
Value *Mask, Value *ShiftAmt,
108+
AtomicOrdering Ord) const override;
102109

103110
private:
104111
/// Target-specific function used to lower LoongArch calling conventions.

0 commit comments

Comments
 (0)