Skip to content

Commit ae7530c

Browse files
andreisfrantmak
authored andcommitted
[Xtensa] Lower atomic_cmp_swap_(8/16/32) operations.
1 parent 56f7d95 commit ae7530c

File tree

3 files changed

+188
-0
lines changed

3 files changed

+188
-0
lines changed

llvm/lib/Target/Xtensa/XtensaISelLowering.cpp

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,16 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &tm,
308308
// them
309309
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
310310

311+
if (!Subtarget.hasS32C1I()) {
312+
for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
313+
I <= MVT::LAST_INTEGER_VALUETYPE; ++I) {
314+
MVT VT = MVT::SimpleValueType(I);
315+
if (isTypeLegal(VT)) {
316+
setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Expand);
317+
}
318+
}
319+
}
320+
311321
// Compute derived properties from the register classes
312322
computeRegisterProperties(STI.getRegisterInfo());
313323

@@ -1949,6 +1959,143 @@ XtensaTargetLowering::emitSelectCC(MachineInstr &MI,
19491959
return BB;
19501960
}
19511961

1962+
// Emit instructions for atomic_cmp_swap node for 8/16 bit operands
1963+
MachineBasicBlock *
1964+
XtensaTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, MachineBasicBlock *BB,
1965+
int isByteOperand) const {
1966+
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
1967+
DebugLoc DL = MI.getDebugLoc();
1968+
1969+
const BasicBlock *LLVM_BB = BB->getBasicBlock();
1970+
MachineFunction::iterator It = ++BB->getIterator();
1971+
1972+
MachineBasicBlock *thisBB = BB;
1973+
MachineFunction *F = BB->getParent();
1974+
MachineBasicBlock *BBLoop = F->CreateMachineBasicBlock(LLVM_BB);
1975+
MachineBasicBlock *BBExit = F->CreateMachineBasicBlock(LLVM_BB);
1976+
1977+
F->insert(It, BBLoop);
1978+
F->insert(It, BBExit);
1979+
1980+
// Transfer the remainder of BB and its successor edges to BBExit.
1981+
BBExit->splice(BBExit->begin(), BB,
1982+
std::next(MachineBasicBlock::iterator(MI)), BB->end());
1983+
BBExit->transferSuccessorsAndUpdatePHIs(BB);
1984+
1985+
BB->addSuccessor(BBLoop);
1986+
1987+
MachineOperand &Res = MI.getOperand(0);
1988+
MachineOperand &AtomValAddr = MI.getOperand(1);
1989+
MachineOperand &CmpVal = MI.getOperand(2);
1990+
MachineOperand &SwpVal = MI.getOperand(3);
1991+
1992+
MachineFunction *MF = BB->getParent();
1993+
MachineRegisterInfo &MRI = MF->getRegInfo();
1994+
const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
1995+
1996+
unsigned R1 = MRI.createVirtualRegister(RC);
1997+
BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R1).addImm(3);
1998+
1999+
unsigned ByteOffs = MRI.createVirtualRegister(RC);
2000+
BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), ByteOffs)
2001+
.addReg(R1)
2002+
.addReg(AtomValAddr.getReg());
2003+
2004+
unsigned AddrAlign = MRI.createVirtualRegister(RC);
2005+
BuildMI(*BB, MI, DL, TII.get(Xtensa::SUB), AddrAlign)
2006+
.addReg(AtomValAddr.getReg())
2007+
.addReg(ByteOffs);
2008+
2009+
unsigned BitOffs = MRI.createVirtualRegister(RC);
2010+
BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), BitOffs)
2011+
.addReg(ByteOffs)
2012+
.addImm(3);
2013+
2014+
unsigned Mask1 = MRI.createVirtualRegister(RC);
2015+
if (isByteOperand) {
2016+
BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), Mask1).addImm(0xff);
2017+
} else {
2018+
unsigned R2 = MRI.createVirtualRegister(RC);
2019+
BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(1);
2020+
unsigned R3 = MRI.createVirtualRegister(RC);
2021+
BuildMI(*BB, MI, DL, TII.get(Xtensa::SLLI), R3).addReg(R2).addImm(16);
2022+
BuildMI(*BB, MI, DL, TII.get(Xtensa::ADDI), Mask1).addReg(R3).addImm(-1);
2023+
}
2024+
2025+
BuildMI(*BB, MI, DL, TII.get(Xtensa::SSL)).addReg(BitOffs);
2026+
2027+
unsigned R2 = MRI.createVirtualRegister(RC);
2028+
BuildMI(*BB, MI, DL, TII.get(Xtensa::MOVI), R2).addImm(-1);
2029+
2030+
unsigned Mask2 = MRI.createVirtualRegister(RC);
2031+
BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Mask2).addReg(Mask1);
2032+
2033+
unsigned Mask3 = MRI.createVirtualRegister(RC);
2034+
BuildMI(*BB, MI, DL, TII.get(Xtensa::XOR), Mask3).addReg(Mask2).addReg(R2);
2035+
2036+
unsigned R3 = MRI.createVirtualRegister(RC);
2037+
BuildMI(*BB, MI, DL, TII.get(Xtensa::L32I), R3).addReg(AddrAlign).addImm(0);
2038+
2039+
unsigned R4 = MRI.createVirtualRegister(RC);
2040+
BuildMI(*BB, MI, DL, TII.get(Xtensa::AND), R4).addReg(R3).addReg(Mask3);
2041+
2042+
unsigned Cmp1 = MRI.createVirtualRegister(RC);
2043+
BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Cmp1).addReg(CmpVal.getReg());
2044+
2045+
unsigned Swp1 = MRI.createVirtualRegister(RC);
2046+
BuildMI(*BB, MI, DL, TII.get(Xtensa::SLL), Swp1).addReg(SwpVal.getReg());
2047+
2048+
BB = BBLoop;
2049+
2050+
unsigned MaskPhi = MRI.createVirtualRegister(RC);
2051+
unsigned MaskLoop = MRI.createVirtualRegister(RC);
2052+
2053+
BuildMI(*BB, BB->begin(), DL, TII.get(Xtensa::PHI), MaskPhi)
2054+
.addReg(MaskLoop)
2055+
.addMBB(BBLoop)
2056+
.addReg(R4)
2057+
.addMBB(thisBB);
2058+
2059+
unsigned Cmp2 = MRI.createVirtualRegister(RC);
2060+
BuildMI(BB, DL, TII.get(Xtensa::OR), Cmp2).addReg(Cmp1).addReg(MaskPhi);
2061+
2062+
unsigned Swp2 = MRI.createVirtualRegister(RC);
2063+
BuildMI(BB, DL, TII.get(Xtensa::OR), Swp2).addReg(Swp1).addReg(MaskPhi);
2064+
2065+
BuildMI(BB, DL, TII.get(Xtensa::WSR), Xtensa::SCOMPARE1).addReg(Cmp2);
2066+
2067+
unsigned Swp3 = MRI.createVirtualRegister(RC);
2068+
BuildMI(BB, DL, TII.get(Xtensa::S32C1I), Swp3)
2069+
.addReg(Swp2)
2070+
.addReg(AddrAlign)
2071+
.addImm(0);
2072+
2073+
BuildMI(BB, DL, TII.get(Xtensa::AND), MaskLoop).addReg(Swp3).addReg(Mask3);
2074+
2075+
BuildMI(BB, DL, TII.get(Xtensa::BNE))
2076+
.addReg(MaskLoop)
2077+
.addReg(MaskPhi)
2078+
.addMBB(BBLoop);
2079+
2080+
BB->addSuccessor(BBLoop);
2081+
BB->addSuccessor(BBExit);
2082+
2083+
BB = BBExit;
2084+
auto St = BBExit->begin();
2085+
2086+
unsigned R5 = MRI.createVirtualRegister(RC);
2087+
BuildMI(*BB, St, DL, TII.get(Xtensa::SSR)).addReg(BitOffs);
2088+
2089+
BuildMI(*BB, St, DL, TII.get(Xtensa::SRL), R5).addReg(Swp3);
2090+
2091+
BuildMI(*BB, St, DL, TII.get(Xtensa::AND), Res.getReg())
2092+
.addReg(R5)
2093+
.addReg(Mask1);
2094+
2095+
MI.eraseFromParent(); // The pseudo instruction is gone now.
2096+
return BB;
2097+
}
2098+
19522099
MachineBasicBlock *XtensaTargetLowering::EmitInstrWithCustomInserter(
19532100
MachineInstr &MI, MachineBasicBlock *MBB) const {
19542101
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
@@ -2025,6 +2172,32 @@ MachineBasicBlock *XtensaTargetLowering::EmitInstrWithCustomInserter(
20252172
return MBB;
20262173
}
20272174

2175+
case Xtensa::ATOMIC_CMP_SWAP_8_P: {
2176+
return emitAtomicCmpSwap(MI, MBB, 1);
2177+
}
2178+
2179+
case Xtensa::ATOMIC_CMP_SWAP_16_P: {
2180+
return emitAtomicCmpSwap(MI, MBB, 0);
2181+
}
2182+
2183+
case Xtensa::ATOMIC_CMP_SWAP_32_P: {
2184+
MachineOperand &R = MI.getOperand(0);
2185+
MachineOperand &Addr = MI.getOperand(1);
2186+
MachineOperand &Cmp = MI.getOperand(2);
2187+
MachineOperand &Swap = MI.getOperand(3);
2188+
2189+
BuildMI(*MBB, MI, DL, TII.get(Xtensa::WSR), Xtensa::SCOMPARE1)
2190+
.addReg(Cmp.getReg());
2191+
2192+
BuildMI(*MBB, MI, DL, TII.get(Xtensa::S32C1I), R.getReg())
2193+
.addReg(Swap.getReg())
2194+
.addReg(Addr.getReg())
2195+
.addImm(0);
2196+
2197+
MI.eraseFromParent();
2198+
return MBB;
2199+
}
2200+
20282201
case Xtensa::S8I:
20292202
case Xtensa::S16I:
20302203
case Xtensa::S32I:

llvm/lib/Target/Xtensa/XtensaISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,8 @@ class XtensaTargetLowering : public TargetLowering {
205205
// Implement EmitInstrWithCustomInserter for individual operation types.
206206
MachineBasicBlock *emitSelectCC(MachineInstr &MI,
207207
MachineBasicBlock *BB) const;
208+
MachineBasicBlock *emitAtomicCmpSwap(MachineInstr &MI, MachineBasicBlock *BB,
209+
int isByteOperand) const;
208210

209211
unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
210212
if (ConstraintCode == "R")

llvm/lib/Target/Xtensa/XtensaInstrInfo.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1498,6 +1498,19 @@ def : Pat<(atomic_store_8 addr_ish1:$addr, AR:$t), (S8I AR:$t, addr_ish1:$addr)
14981498
def : Pat<(atomic_store_16 addr_ish2:$addr, AR:$t), (S16I AR:$t, addr_ish2:$addr)>;
14991499
def : Pat<(atomic_store_32 addr_ish4:$addr, AR:$t), (S32I AR:$t, addr_ish4:$addr)>;
15001500

1501+
let usesCustomInserter = 1, Predicates = [HasS32C1I] in
1502+
{
1503+
def ATOMIC_CMP_SWAP_8_P: Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$cmp, AR:$swap),
1504+
"!atomic_cmp_swap_8_p, $dst, $ptr, $cmp, $swap",
1505+
[(set AR:$dst, (atomic_cmp_swap_8 AR:$ptr, AR:$cmp, AR:$swap))]>;
1506+
def ATOMIC_CMP_SWAP_16_P: Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$cmp, AR:$swap),
1507+
"!atomic_cmp_swap_16_p, $dst, $ptr, $cmp, $swap",
1508+
[(set AR:$dst, (atomic_cmp_swap_16 AR:$ptr, AR:$cmp, AR:$swap))]>;
1509+
def ATOMIC_CMP_SWAP_32_P: Pseudo<(outs AR:$dst), (ins AR:$ptr, AR:$cmp, AR:$swap),
1510+
"!atomic_cmp_swap_32_p, $dst, $ptr, $cmp, $swap",
1511+
[(set AR:$dst, (atomic_cmp_swap_32 AR:$ptr, AR:$cmp, AR:$swap))]>;
1512+
}
1513+
15011514
//===----------------------------------------------------------------------===//
15021515
// DSP Instructions
15031516
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)