Skip to content

Commit 066d03f

Browse files
andreisfrantmak
authored andcommitted
[Xtensa] Lower atomic_swap_(8/16/32) operations.
1 parent ae7530c commit 066d03f

File tree

3 files changed

+289
-0
lines changed

3 files changed

+289
-0
lines changed

llvm/lib/Target/Xtensa/XtensaISelLowering.cpp

Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,7 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &tm,
314314
MVT VT = MVT::SimpleValueType(I);
315315
if (isTypeLegal(VT)) {
316316
setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Expand);
317+
setOperationAction(ISD::ATOMIC_SWAP, VT, Expand);
317318
}
318319
}
319320
}
@@ -2096,6 +2097,268 @@ XtensaTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, MachineBasicBlock *BB,
20962097
return BB;
20972098
}
20982099

2100+
// Emit instructions for atomic_swap node for 8/16 bit operands
2101+
MachineBasicBlock *
2102+
XtensaTargetLowering::emitAtomicSwap(MachineInstr &MI, MachineBasicBlock *BB,
2103+
int isByteOperand) const {
2104+
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2105+
DebugLoc DL = MI.getDebugLoc();
2106+
2107+
const BasicBlock *LLVM_BB = BB->getBasicBlock();
2108+
MachineFunction::iterator It = ++BB->getIterator();
2109+
2110+
MachineFunction *F = BB->getParent();
2111+
MachineBasicBlock *BBLoop1 = F->CreateMachineBasicBlock(LLVM_BB);
2112+
MachineBasicBlock *BBLoop2 = F->CreateMachineBasicBlock(LLVM_BB);
2113+
MachineBasicBlock *BBLoop3 = F->CreateMachineBasicBlock(LLVM_BB);
2114+
MachineBasicBlock *BBLoop4 = F->CreateMachineBasicBlock(LLVM_BB);
2115+
MachineBasicBlock *BBExit = F->CreateMachineBasicBlock(LLVM_BB);
2116+
2117+
F->insert(It, BBLoop1);
2118+
F->insert(It, BBLoop2);
2119+
F->insert(It, BBLoop3);
2120+
F->insert(It, BBLoop4);
2121+
F->insert(It, BBExit);
2122+
2123+
// Transfer the remainder of BB and its successor edges to BBExit.
2124+
BBExit->splice(BBExit->begin(), BB,
2125+
std::next(MachineBasicBlock::iterator(MI)), BB->end());
2126+
BBExit->transferSuccessorsAndUpdatePHIs(BB);
2127+
2128+
BB->addSuccessor(BBLoop1);
2129+
BBLoop1->addSuccessor(BBLoop2);
2130+
BBLoop2->addSuccessor(BBLoop3);
2131+
BBLoop2->addSuccessor(BBLoop4);
2132+
BBLoop3->addSuccessor(BBLoop2);
2133+
BBLoop3->addSuccessor(BBLoop4);
2134+
BBLoop4->addSuccessor(BBLoop1);
2135+
BBLoop4->addSuccessor(BBExit);
2136+
2137+
MachineOperand &Res = MI.getOperand(0);
2138+
MachineOperand &AtomValAddr = MI.getOperand(1);
2139+
MachineOperand &SwpVal = MI.getOperand(2);
2140+
2141+
MachineFunction *MF = BB->getParent();
2142+
MachineRegisterInfo &MRI = MF->getRegInfo();
2143+
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
2144+
2145+
unsigned R1 = MRI.createVirtualRegister(RC);
2146+
BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R1).addImm(3);
2147+
2148+
unsigned ByteOffs = MRI.createVirtualRegister(RC);
2149+
BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), ByteOffs)
2150+
.addReg(R1)
2151+
.addReg(AtomValAddr.getReg());
2152+
2153+
unsigned AddrAlign = MRI.createVirtualRegister(RC);
2154+
BuildMI(*BB, MI, DL, TII.get(Xtensa::SUB), AddrAlign)
2155+
.addReg(AtomValAddr.getReg())
2156+
.addReg(ByteOffs);
2157+
2158+
unsigned BitOffs = MRI.createVirtualRegister(RC);
2159+
BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), BitOffs)
2160+
.addReg(ByteOffs)
2161+
.addImm(3);
2162+
2163+
unsigned Mask1 = MRI.createVirtualRegister(RC);
2164+
if (isByteOperand) {
2165+
BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), Mask1).addImm(0xff);
2166+
} else {
2167+
unsigned R2 = MRI.createVirtualRegister(RC);
2168+
BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(1);
2169+
unsigned R3 = MRI.createVirtualRegister(RC);
2170+
BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), R3).addReg(R2).addImm(16);
2171+
BuildMI(*BB, MI, DL, TII.get(Xtensa::ADDI), Mask1).addReg(R3).addImm(-1);
2172+
}
2173+
2174+
BuildMI(*BB, MI, DL, TII.get(Xtensa::SSL)).addReg(BitOffs);
2175+
2176+
unsigned R2 = MRI.createVirtualRegister(RC);
2177+
BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(-1);
2178+
2179+
unsigned Mask2 = MRI.createVirtualRegister(RC);
2180+
BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Mask2).addReg(Mask1);
2181+
2182+
unsigned Mask3 = MRI.createVirtualRegister(RC);
2183+
BuildMI(*BB, MI, DL, TII.get(Xtensa::XOR), Mask3).addReg(Mask2).addReg(R2);
2184+
2185+
unsigned R3 = MRI.createVirtualRegister(RC);
2186+
BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), R3).addReg(AddrAlign).addImm(0);
2187+
2188+
unsigned R4 = MRI.createVirtualRegister(RC);
2189+
BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), R4).addReg(R3).addReg(Mask3);
2190+
2191+
unsigned SwpValShifted = MRI.createVirtualRegister(RC);
2192+
BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), SwpValShifted)
2193+
.addReg(SwpVal.getReg());
2194+
2195+
unsigned R5 = MRI.createVirtualRegister(RC);
2196+
BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), R5).addReg(AddrAlign).addImm(0);
2197+
2198+
unsigned AtomVal = MRI.createVirtualRegister(RC);
2199+
BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), AtomVal).addReg(R5).addReg(Mask2);
2200+
2201+
unsigned AtomValPhi = MRI.createVirtualRegister(RC);
2202+
unsigned AtomValLoop = MRI.createVirtualRegister(RC);
2203+
2204+
BuildMI(*BBLoop1, BBLoop1->begin(), DL, TII.get(Xtensa::PHI), AtomValPhi)
2205+
.addReg(AtomValLoop)
2206+
.addMBB(BBLoop4)
2207+
.addReg(AtomVal)
2208+
.addMBB(BB);
2209+
2210+
BB = BBLoop1;
2211+
2212+
BuildMI(BB, DL, TII.get(Xtensa::MEMW));
2213+
2214+
unsigned R6 = MRI.createVirtualRegister(RC);
2215+
BuildMI(BB, DL, TII.get(Xtensa::L32I), R6).addReg(AddrAlign).addImm(0);
2216+
2217+
unsigned R7 = MRI.createVirtualRegister(RC);
2218+
BuildMI(BB, DL, TII.get(Xtensa::AND), R7).addReg(R6).addReg(Mask3);
2219+
2220+
unsigned MaskPhi = MRI.createVirtualRegister(RC);
2221+
unsigned MaskLoop = MRI.createVirtualRegister(RC);
2222+
2223+
BuildMI(*BBLoop2, BBLoop2->begin(), DL, TII.get(Xtensa::PHI), MaskPhi)
2224+
.addReg(MaskLoop)
2225+
.addMBB(BBLoop3)
2226+
.addReg(R7)
2227+
.addMBB(BBLoop1);
2228+
2229+
BB = BBLoop2;
2230+
2231+
unsigned Swp1 = MRI.createVirtualRegister(RC);
2232+
BuildMI(BB, DL, TII.get(Xtensa::OR), Swp1)
2233+
.addReg(SwpValShifted)
2234+
.addReg(MaskPhi);
2235+
2236+
unsigned AtomVal1 = MRI.createVirtualRegister(RC);
2237+
BuildMI(BB, DL, TII.get(Xtensa::OR), AtomVal1)
2238+
.addReg(AtomValPhi)
2239+
.addReg(MaskPhi);
2240+
2241+
BuildMI(BB, DL, TII.get(Xtensa::WSR), Xtensa::SCOMPARE1).addReg(AtomVal1);
2242+
2243+
unsigned Swp2 = MRI.createVirtualRegister(RC);
2244+
BuildMI(BB, DL, TII.get(Xtensa::S32C1I), Swp2)
2245+
.addReg(Swp1)
2246+
.addReg(AddrAlign)
2247+
.addImm(0);
2248+
2249+
BuildMI(BB, DL, TII.get(Xtensa::BEQ))
2250+
.addReg(AtomVal1)
2251+
.addReg(Swp2)
2252+
.addMBB(BBLoop4);
2253+
2254+
BB = BBLoop3;
2255+
2256+
BuildMI(BB, DL, TII.get(Xtensa::AND), MaskLoop).addReg(Swp2).addReg(Mask3);
2257+
2258+
BuildMI(BB, DL, TII.get(Xtensa::BNE))
2259+
.addReg(MaskLoop)
2260+
.addReg(MaskPhi)
2261+
.addMBB(BBLoop2);
2262+
2263+
BB = BBLoop4;
2264+
2265+
BuildMI(BB, DL, TII.get(Xtensa::AND), AtomValLoop).addReg(Swp2).addReg(Mask2);
2266+
2267+
BuildMI(BB, DL, TII.get(Xtensa::BNE))
2268+
.addReg(AtomValLoop)
2269+
.addReg(AtomValPhi)
2270+
.addMBB(BBLoop1);
2271+
2272+
BB = BBExit;
2273+
2274+
auto St = BB->begin();
2275+
2276+
unsigned R8 = MRI.createVirtualRegister(RC);
2277+
2278+
BuildMI(*BB, St, DL, TII.get(Xtensa::SSR)).addReg(BitOffs);
2279+
BuildMI(*BB, St, DL, TII.get(Xtensa::SLL), R8).addReg(AtomValLoop);
2280+
2281+
if (isByteOperand) {
2282+
BuildMI(*BB, St, DL, TII.get(Xtensa::SEXT), Res.getReg())
2283+
.addReg(R8)
2284+
.addImm(7);
2285+
} else {
2286+
BuildMI(*BB, St, DL, TII.get(Xtensa::SEXT), Res.getReg())
2287+
.addReg(R8)
2288+
.addImm(15);
2289+
}
2290+
2291+
MI.eraseFromParent(); // The pseudo instruction is gone now.
2292+
return BB;
2293+
}
2294+
2295+
// Emit instructions for atomic_swap node for 32 bit operands
2296+
MachineBasicBlock *
2297+
XtensaTargetLowering::emitAtomicSwap(MachineInstr &MI,
2298+
MachineBasicBlock *BB) const {
2299+
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2300+
DebugLoc DL = MI.getDebugLoc();
2301+
2302+
const BasicBlock *LLVM_BB = BB->getBasicBlock();
2303+
MachineFunction::iterator It = ++BB->getIterator();
2304+
2305+
MachineFunction *F = BB->getParent();
2306+
MachineBasicBlock *BBLoop = F->CreateMachineBasicBlock(LLVM_BB);
2307+
MachineBasicBlock *BBExit = F->CreateMachineBasicBlock(LLVM_BB);
2308+
2309+
F->insert(It, BBLoop);
2310+
F->insert(It, BBExit);
2311+
2312+
// Transfer the remainder of BB and its successor edges to BBExit.
2313+
BBExit->splice(BBExit->begin(), BB,
2314+
std::next(MachineBasicBlock::iterator(MI)), BB->end());
2315+
BBExit->transferSuccessorsAndUpdatePHIs(BB);
2316+
2317+
BB->addSuccessor(BBLoop);
2318+
BBLoop->addSuccessor(BBLoop);
2319+
BBLoop->addSuccessor(BBExit);
2320+
2321+
MachineOperand &Res = MI.getOperand(0);
2322+
MachineOperand &AtomValAddr = MI.getOperand(1);
2323+
MachineOperand &SwpVal = MI.getOperand(2);
2324+
2325+
MachineFunction *MF = BB->getParent();
2326+
MachineRegisterInfo &MRI = MF->getRegInfo();
2327+
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
2328+
2329+
BuildMI(*BB, MI, DL, TII.get(Xtensa::MEMW));
2330+
2331+
unsigned AtomVal = MRI.createVirtualRegister(RC);
2332+
BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), AtomVal)
2333+
.addReg(AtomValAddr.getReg())
2334+
.addImm(0);
2335+
2336+
unsigned AtomValLoop = MRI.createVirtualRegister(RC);
2337+
2338+
BuildMI(*BBLoop, BBLoop->begin(), DL, TII.get(Xtensa::PHI), Res.getReg())
2339+
.addReg(AtomValLoop)
2340+
.addMBB(BBLoop)
2341+
.addReg(AtomVal)
2342+
.addMBB(BB);
2343+
2344+
BB = BBLoop;
2345+
2346+
BuildMI(BB, DL, TII.get(Xtensa::WSR), Xtensa::SCOMPARE1).addReg(Res.getReg());
2347+
2348+
BuildMI(BB, DL, TII.get(Xtensa::S32C1I), AtomValLoop)
2349+
.addReg(SwpVal.getReg())
2350+
.addReg(AtomValAddr.getReg())
2351+
.addImm(0);
2352+
2353+
BuildMI(BB, DL, TII.get(Xtensa::BNE))
2354+
.addReg(AtomValLoop)
2355+
.addReg(Res.getReg())
2356+
.addMBB(BBLoop);
2357+
2358+
MI.eraseFromParent(); // The pseudo instruction is gone now.
2359+
return BB;
2360+
}
2361+
20992362
MachineBasicBlock *XtensaTargetLowering::EmitInstrWithCustomInserter(
21002363
MachineInstr &MI, MachineBasicBlock *MBB) const {
21012364
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
@@ -2198,6 +2461,18 @@ MachineBasicBlock *XtensaTargetLowering::EmitInstrWithCustomInserter(
21982461
return MBB;
21992462
}
22002463

2464+
case Xtensa::ATOMIC_SWAP_8_P: {
2465+
return emitAtomicSwap(MI, MBB, 1);
2466+
}
2467+
2468+
case Xtensa::ATOMIC_SWAP_16_P: {
2469+
return emitAtomicSwap(MI, MBB, 0);
2470+
}
2471+
2472+
case Xtensa::ATOMIC_SWAP_32_P: {
2473+
return emitAtomicSwap(MI, MBB);
2474+
}
2475+
22012476
case Xtensa::S8I:
22022477
case Xtensa::S16I:
22032478
case Xtensa::S32I:

llvm/lib/Target/Xtensa/XtensaISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,12 @@ class XtensaTargetLowering : public TargetLowering {
205205
// Implement EmitInstrWithCustomInserter for individual operation types.
206206
MachineBasicBlock *emitSelectCC(MachineInstr &MI,
207207
MachineBasicBlock *BB) const;
208+
MachineBasicBlock *emitAtomicSwap(MachineInstr &MI, MachineBasicBlock *BB,
209+
int isByteOperand) const;
208210
MachineBasicBlock *emitAtomicCmpSwap(MachineInstr &MI, MachineBasicBlock *BB,
209211
int isByteOperand) const;
212+
MachineBasicBlock *emitAtomicSwap(MachineInstr &MI,
213+
MachineBasicBlock *BB) const;
210214

211215
unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
212216
if (ConstraintCode == "R")

llvm/lib/Target/Xtensa/XtensaInstrInfo.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1509,6 +1509,16 @@ let usesCustomInserter = 1, Predicates = [HasS32C1I] in
15091509
def ATOMIC_CMP_SWAP_32_P: Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$cmp, AR:$swap),
15101510
"!atomic_cmp_swap_32_p, $dst, $ptr, $cmp, $swap",
15111511
[(set AR:$dst, (atomic_cmp_swap_32 AR:$ptr, AR:$cmp, AR:$swap))]>;
1512+
1513+
def ATOMIC_SWAP_8_P: Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$swap),
1514+
"!atomic_swap_8_p, $dst, $ptr, $swap",
1515+
[(set AR:$dst, (atomic_swap_8 AR:$ptr, AR:$swap))]>;
1516+
def ATOMIC_SWAP_16_P: Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$swap),
1517+
"!atomic_swap_16_p, $dst, $ptr, $swap",
1518+
[(set AR:$dst, (atomic_swap_16 AR:$ptr, AR:$swap))]>;
1519+
def ATOMIC_SWAP_32_P: Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$swap),
1520+
"!atomic_swap_32_p, $dst, $ptr, $swap",
1521+
[(set AR:$dst, (atomic_swap_32 AR:$ptr, AR:$swap))]>;
15121522
}
15131523

15141524
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)