@@ -314,6 +314,7 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &tm,
314
314
MVT VT = MVT::SimpleValueType (I);
315
315
if (isTypeLegal (VT)) {
316
316
setOperationAction (ISD::ATOMIC_CMP_SWAP, VT, Expand);
317
+ setOperationAction (ISD::ATOMIC_SWAP, VT, Expand);
317
318
}
318
319
}
319
320
}
@@ -2096,6 +2097,268 @@ XtensaTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, MachineBasicBlock *BB,
2096
2097
return BB;
2097
2098
}
2098
2099
2100
+ // Emit instructions for atomic_swap node for 8/16 bit operands
2101
+ MachineBasicBlock *
2102
+ XtensaTargetLowering::emitAtomicSwap (MachineInstr &MI, MachineBasicBlock *BB,
2103
+ int isByteOperand) const {
2104
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo ();
2105
+ DebugLoc DL = MI.getDebugLoc ();
2106
+
2107
+ const BasicBlock *LLVM_BB = BB->getBasicBlock ();
2108
+ MachineFunction::iterator It = ++BB->getIterator ();
2109
+
2110
+ MachineFunction *F = BB->getParent ();
2111
+ MachineBasicBlock *BBLoop1 = F->CreateMachineBasicBlock (LLVM_BB);
2112
+ MachineBasicBlock *BBLoop2 = F->CreateMachineBasicBlock (LLVM_BB);
2113
+ MachineBasicBlock *BBLoop3 = F->CreateMachineBasicBlock (LLVM_BB);
2114
+ MachineBasicBlock *BBLoop4 = F->CreateMachineBasicBlock (LLVM_BB);
2115
+ MachineBasicBlock *BBExit = F->CreateMachineBasicBlock (LLVM_BB);
2116
+
2117
+ F->insert (It, BBLoop1);
2118
+ F->insert (It, BBLoop2);
2119
+ F->insert (It, BBLoop3);
2120
+ F->insert (It, BBLoop4);
2121
+ F->insert (It, BBExit);
2122
+
2123
+ // Transfer the remainder of BB and its successor edges to BBExit.
2124
+ BBExit->splice (BBExit->begin (), BB,
2125
+ std::next (MachineBasicBlock::iterator (MI)), BB->end ());
2126
+ BBExit->transferSuccessorsAndUpdatePHIs (BB);
2127
+
2128
+ BB->addSuccessor (BBLoop1);
2129
+ BBLoop1->addSuccessor (BBLoop2);
2130
+ BBLoop2->addSuccessor (BBLoop3);
2131
+ BBLoop2->addSuccessor (BBLoop4);
2132
+ BBLoop3->addSuccessor (BBLoop2);
2133
+ BBLoop3->addSuccessor (BBLoop4);
2134
+ BBLoop4->addSuccessor (BBLoop1);
2135
+ BBLoop4->addSuccessor (BBExit);
2136
+
2137
+ MachineOperand &Res = MI.getOperand (0 );
2138
+ MachineOperand &AtomValAddr = MI.getOperand (1 );
2139
+ MachineOperand &SwpVal = MI.getOperand (2 );
2140
+
2141
+ MachineFunction *MF = BB->getParent ();
2142
+ MachineRegisterInfo &MRI = MF->getRegInfo ();
2143
+ const TargetRegisterClass *RC = getRegClassFor (MVT::i32 );
2144
+
2145
+ unsigned R1 = MRI.createVirtualRegister (RC);
2146
+ BuildMI (*BB, MI, DL, TII.get (Xtensa::MOVI), R1).addImm (3 );
2147
+
2148
+ unsigned ByteOffs = MRI.createVirtualRegister (RC);
2149
+ BuildMI (*BB, MI, DL, TII.get (Xtensa::AND), ByteOffs)
2150
+ .addReg (R1)
2151
+ .addReg (AtomValAddr.getReg ());
2152
+
2153
+ unsigned AddrAlign = MRI.createVirtualRegister (RC);
2154
+ BuildMI (*BB, MI, DL, TII.get (Xtensa::SUB), AddrAlign)
2155
+ .addReg (AtomValAddr.getReg ())
2156
+ .addReg (ByteOffs);
2157
+
2158
+ unsigned BitOffs = MRI.createVirtualRegister (RC);
2159
+ BuildMI (*BB, MI, DL, TII.get (Xtensa::SLLI), BitOffs)
2160
+ .addReg (ByteOffs)
2161
+ .addImm (3 );
2162
+
2163
+ unsigned Mask1 = MRI.createVirtualRegister (RC);
2164
+ if (isByteOperand) {
2165
+ BuildMI (*BB, MI, DL, TII.get (Xtensa::MOVI), Mask1).addImm (0xff );
2166
+ } else {
2167
+ unsigned R2 = MRI.createVirtualRegister (RC);
2168
+ BuildMI (*BB, MI, DL, TII.get (Xtensa::MOVI), R2).addImm (1 );
2169
+ unsigned R3 = MRI.createVirtualRegister (RC);
2170
+ BuildMI (*BB, MI, DL, TII.get (Xtensa::SLLI), R3).addReg (R2).addImm (16 );
2171
+ BuildMI (*BB, MI, DL, TII.get (Xtensa::ADDI), Mask1).addReg (R3).addImm (-1 );
2172
+ }
2173
+
2174
+ BuildMI (*BB, MI, DL, TII.get (Xtensa::SSL)).addReg (BitOffs);
2175
+
2176
+ unsigned R2 = MRI.createVirtualRegister (RC);
2177
+ BuildMI (*BB, MI, DL, TII.get (Xtensa::MOVI), R2).addImm (-1 );
2178
+
2179
+ unsigned Mask2 = MRI.createVirtualRegister (RC);
2180
+ BuildMI (*BB, MI, DL, TII.get (Xtensa::SLL), Mask2).addReg (Mask1);
2181
+
2182
+ unsigned Mask3 = MRI.createVirtualRegister (RC);
2183
+ BuildMI (*BB, MI, DL, TII.get (Xtensa::XOR), Mask3).addReg (Mask2).addReg (R2);
2184
+
2185
+ unsigned R3 = MRI.createVirtualRegister (RC);
2186
+ BuildMI (*BB, MI, DL, TII.get (Xtensa::L32I), R3).addReg (AddrAlign).addImm (0 );
2187
+
2188
+ unsigned R4 = MRI.createVirtualRegister (RC);
2189
+ BuildMI (*BB, MI, DL, TII.get (Xtensa::AND), R4).addReg (R3).addReg (Mask3);
2190
+
2191
+ unsigned SwpValShifted = MRI.createVirtualRegister (RC);
2192
+ BuildMI (*BB, MI, DL, TII.get (Xtensa::SLL), SwpValShifted)
2193
+ .addReg (SwpVal.getReg ());
2194
+
2195
+ unsigned R5 = MRI.createVirtualRegister (RC);
2196
+ BuildMI (*BB, MI, DL, TII.get (Xtensa::L32I), R5).addReg (AddrAlign).addImm (0 );
2197
+
2198
+ unsigned AtomVal = MRI.createVirtualRegister (RC);
2199
+ BuildMI (*BB, MI, DL, TII.get (Xtensa::AND), AtomVal).addReg (R5).addReg (Mask2);
2200
+
2201
+ unsigned AtomValPhi = MRI.createVirtualRegister (RC);
2202
+ unsigned AtomValLoop = MRI.createVirtualRegister (RC);
2203
+
2204
+ BuildMI (*BBLoop1, BBLoop1->begin (), DL, TII.get (Xtensa::PHI), AtomValPhi)
2205
+ .addReg (AtomValLoop)
2206
+ .addMBB (BBLoop4)
2207
+ .addReg (AtomVal)
2208
+ .addMBB (BB);
2209
+
2210
+ BB = BBLoop1;
2211
+
2212
+ BuildMI (BB, DL, TII.get (Xtensa::MEMW));
2213
+
2214
+ unsigned R6 = MRI.createVirtualRegister (RC);
2215
+ BuildMI (BB, DL, TII.get (Xtensa::L32I), R6).addReg (AddrAlign).addImm (0 );
2216
+
2217
+ unsigned R7 = MRI.createVirtualRegister (RC);
2218
+ BuildMI (BB, DL, TII.get (Xtensa::AND), R7).addReg (R6).addReg (Mask3);
2219
+
2220
+ unsigned MaskPhi = MRI.createVirtualRegister (RC);
2221
+ unsigned MaskLoop = MRI.createVirtualRegister (RC);
2222
+
2223
+ BuildMI (*BBLoop2, BBLoop2->begin (), DL, TII.get (Xtensa::PHI), MaskPhi)
2224
+ .addReg (MaskLoop)
2225
+ .addMBB (BBLoop3)
2226
+ .addReg (R7)
2227
+ .addMBB (BBLoop1);
2228
+
2229
+ BB = BBLoop2;
2230
+
2231
+ unsigned Swp1 = MRI.createVirtualRegister (RC);
2232
+ BuildMI (BB, DL, TII.get (Xtensa::OR), Swp1)
2233
+ .addReg (SwpValShifted)
2234
+ .addReg (MaskPhi);
2235
+
2236
+ unsigned AtomVal1 = MRI.createVirtualRegister (RC);
2237
+ BuildMI (BB, DL, TII.get (Xtensa::OR), AtomVal1)
2238
+ .addReg (AtomValPhi)
2239
+ .addReg (MaskPhi);
2240
+
2241
+ BuildMI (BB, DL, TII.get (Xtensa::WSR), Xtensa::SCOMPARE1).addReg (AtomVal1);
2242
+
2243
+ unsigned Swp2 = MRI.createVirtualRegister (RC);
2244
+ BuildMI (BB, DL, TII.get (Xtensa::S32C1I), Swp2)
2245
+ .addReg (Swp1)
2246
+ .addReg (AddrAlign)
2247
+ .addImm (0 );
2248
+
2249
+ BuildMI (BB, DL, TII.get (Xtensa::BEQ))
2250
+ .addReg (AtomVal1)
2251
+ .addReg (Swp2)
2252
+ .addMBB (BBLoop4);
2253
+
2254
+ BB = BBLoop3;
2255
+
2256
+ BuildMI (BB, DL, TII.get (Xtensa::AND), MaskLoop).addReg (Swp2).addReg (Mask3);
2257
+
2258
+ BuildMI (BB, DL, TII.get (Xtensa::BNE))
2259
+ .addReg (MaskLoop)
2260
+ .addReg (MaskPhi)
2261
+ .addMBB (BBLoop2);
2262
+
2263
+ BB = BBLoop4;
2264
+
2265
+ BuildMI (BB, DL, TII.get (Xtensa::AND), AtomValLoop).addReg (Swp2).addReg (Mask2);
2266
+
2267
+ BuildMI (BB, DL, TII.get (Xtensa::BNE))
2268
+ .addReg (AtomValLoop)
2269
+ .addReg (AtomValPhi)
2270
+ .addMBB (BBLoop1);
2271
+
2272
+ BB = BBExit;
2273
+
2274
+ auto St = BB->begin ();
2275
+
2276
+ unsigned R8 = MRI.createVirtualRegister (RC);
2277
+
2278
+ BuildMI (*BB, St, DL, TII.get (Xtensa::SSR)).addReg (BitOffs);
2279
+ BuildMI (*BB, St, DL, TII.get (Xtensa::SLL), R8).addReg (AtomValLoop);
2280
+
2281
+ if (isByteOperand) {
2282
+ BuildMI (*BB, St, DL, TII.get (Xtensa::SEXT), Res.getReg ())
2283
+ .addReg (R8)
2284
+ .addImm (7 );
2285
+ } else {
2286
+ BuildMI (*BB, St, DL, TII.get (Xtensa::SEXT), Res.getReg ())
2287
+ .addReg (R8)
2288
+ .addImm (15 );
2289
+ }
2290
+
2291
+ MI.eraseFromParent (); // The pseudo instruction is gone now.
2292
+ return BB;
2293
+ }
2294
+
2295
+ // Emit instructions for atomic_swap node for 32 bit operands
2296
+ MachineBasicBlock *
2297
+ XtensaTargetLowering::emitAtomicSwap (MachineInstr &MI,
2298
+ MachineBasicBlock *BB) const {
2299
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo ();
2300
+ DebugLoc DL = MI.getDebugLoc ();
2301
+
2302
+ const BasicBlock *LLVM_BB = BB->getBasicBlock ();
2303
+ MachineFunction::iterator It = ++BB->getIterator ();
2304
+
2305
+ MachineFunction *F = BB->getParent ();
2306
+ MachineBasicBlock *BBLoop = F->CreateMachineBasicBlock (LLVM_BB);
2307
+ MachineBasicBlock *BBExit = F->CreateMachineBasicBlock (LLVM_BB);
2308
+
2309
+ F->insert (It, BBLoop);
2310
+ F->insert (It, BBExit);
2311
+
2312
+ // Transfer the remainder of BB and its successor edges to BBExit.
2313
+ BBExit->splice (BBExit->begin (), BB,
2314
+ std::next (MachineBasicBlock::iterator (MI)), BB->end ());
2315
+ BBExit->transferSuccessorsAndUpdatePHIs (BB);
2316
+
2317
+ BB->addSuccessor (BBLoop);
2318
+ BBLoop->addSuccessor (BBLoop);
2319
+ BBLoop->addSuccessor (BBExit);
2320
+
2321
+ MachineOperand &Res = MI.getOperand (0 );
2322
+ MachineOperand &AtomValAddr = MI.getOperand (1 );
2323
+ MachineOperand &SwpVal = MI.getOperand (2 );
2324
+
2325
+ MachineFunction *MF = BB->getParent ();
2326
+ MachineRegisterInfo &MRI = MF->getRegInfo ();
2327
+ const TargetRegisterClass *RC = getRegClassFor (MVT::i32 );
2328
+
2329
+ BuildMI (*BB, MI, DL, TII.get (Xtensa::MEMW));
2330
+
2331
+ unsigned AtomVal = MRI.createVirtualRegister (RC);
2332
+ BuildMI (*BB, MI, DL, TII.get (Xtensa::L32I), AtomVal)
2333
+ .addReg (AtomValAddr.getReg ())
2334
+ .addImm (0 );
2335
+
2336
+ unsigned AtomValLoop = MRI.createVirtualRegister (RC);
2337
+
2338
+ BuildMI (*BBLoop, BBLoop->begin (), DL, TII.get (Xtensa::PHI), Res.getReg ())
2339
+ .addReg (AtomValLoop)
2340
+ .addMBB (BBLoop)
2341
+ .addReg (AtomVal)
2342
+ .addMBB (BB);
2343
+
2344
+ BB = BBLoop;
2345
+
2346
+ BuildMI (BB, DL, TII.get (Xtensa::WSR), Xtensa::SCOMPARE1).addReg (Res.getReg ());
2347
+
2348
+ BuildMI (BB, DL, TII.get (Xtensa::S32C1I), AtomValLoop)
2349
+ .addReg (SwpVal.getReg ())
2350
+ .addReg (AtomValAddr.getReg ())
2351
+ .addImm (0 );
2352
+
2353
+ BuildMI (BB, DL, TII.get (Xtensa::BNE))
2354
+ .addReg (AtomValLoop)
2355
+ .addReg (Res.getReg ())
2356
+ .addMBB (BBLoop);
2357
+
2358
+ MI.eraseFromParent (); // The pseudo instruction is gone now.
2359
+ return BB;
2360
+ }
2361
+
2099
2362
MachineBasicBlock *XtensaTargetLowering::EmitInstrWithCustomInserter (
2100
2363
MachineInstr &MI, MachineBasicBlock *MBB) const {
2101
2364
const TargetInstrInfo &TII = *Subtarget.getInstrInfo ();
@@ -2198,6 +2461,18 @@ MachineBasicBlock *XtensaTargetLowering::EmitInstrWithCustomInserter(
2198
2461
return MBB;
2199
2462
}
2200
2463
2464
+ case Xtensa::ATOMIC_SWAP_8_P: {
2465
+ return emitAtomicSwap (MI, MBB, 1 );
2466
+ }
2467
+
2468
+ case Xtensa::ATOMIC_SWAP_16_P: {
2469
+ return emitAtomicSwap (MI, MBB, 0 );
2470
+ }
2471
+
2472
+ case Xtensa::ATOMIC_SWAP_32_P: {
2473
+ return emitAtomicSwap (MI, MBB);
2474
+ }
2475
+
2201
2476
case Xtensa::S8I:
2202
2477
case Xtensa::S16I:
2203
2478
case Xtensa::S32I:
0 commit comments