19
19
#include " MCTargetDesc/AMDGPUMCTargetDesc.h"
20
20
#include " MCTargetDesc/R600MCTargetDesc.h"
21
21
#include " R600RegisterInfo.h"
22
+ #include " SIISelLowering.h"
22
23
#include " SIMachineFunctionInfo.h"
23
24
#include " llvm/Analysis/UniformityAnalysis.h"
24
25
#include " llvm/Analysis/ValueTracking.h"
@@ -2263,6 +2264,34 @@ bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
2263
2264
return false ;
2264
2265
}
2265
2266
2267
+ static SDValue combineBallotPattern (SDValue VCMP, bool &Negate) {
2268
+ assert (VCMP->getOpcode () == AMDGPUISD::SETCC);
2269
+ // Special case for amdgcn.ballot:
2270
+ // %Cond = i1 (and/or combination of i1 ISD::SETCCs)
2271
+ // %VCMP = i(WaveSize) AMDGPUISD::SETCC (ext %Cond), 0, setne/seteq
2272
+ // =>
2273
+ // Use i1 %Cond value instead of i(WaveSize) %VCMP.
2274
+ // This is possible because divergent ISD::SETCC is selected as V_CMP and
2275
+ // Cond becomes a i(WaveSize) full mask value.
2276
+ // Note that ballot doesn't use SETEQ condition but its easy to support it
2277
+ // here for completeness, so in this case Negate is set true on return.
2278
+ auto VCMP_CC = cast<CondCodeSDNode>(VCMP.getOperand (2 ))->get ();
2279
+ auto *VCMP_CRHS = dyn_cast<ConstantSDNode>(VCMP.getOperand (1 ));
2280
+ if ((VCMP_CC == ISD::SETEQ || VCMP_CC == ISD::SETNE) && VCMP_CRHS &&
2281
+ VCMP_CRHS->isZero ()) {
2282
+
2283
+ auto Cond = VCMP.getOperand (0 );
2284
+ if (ISD::isExtOpcode (Cond->getOpcode ())) // Skip extension.
2285
+ Cond = Cond.getOperand (0 );
2286
+
2287
+ if (isBoolSGPR (Cond)) {
2288
+ Negate = VCMP_CC == ISD::SETEQ;
2289
+ return Cond;
2290
+ }
2291
+ }
2292
+ return SDValue ();
2293
+ }
2294
+
2266
2295
void AMDGPUDAGToDAGISel::SelectBRCOND (SDNode *N) {
2267
2296
SDValue Cond = N->getOperand (1 );
2268
2297
@@ -2276,11 +2305,50 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
2276
2305
const SIRegisterInfo *TRI = ST->getRegisterInfo ();
2277
2306
2278
2307
bool UseSCCBr = isCBranchSCC (N) && isUniformBr (N);
2279
- unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
2308
+ bool AndExec = !UseSCCBr;
2309
+ bool Negate = false ;
2310
+
2311
+ if (Cond.getOpcode () == ISD::SETCC &&
2312
+ Cond->getOperand (0 )->getOpcode () == AMDGPUISD::SETCC) {
2313
+ SDValue VCMP = Cond->getOperand (0 );
2314
+ auto CC = cast<CondCodeSDNode>(Cond->getOperand (2 ))->get ();
2315
+ auto *CRHS = dyn_cast<ConstantSDNode>(Cond->getOperand (1 ));
2316
+ if ((CC == ISD::SETEQ || CC == ISD::SETNE) && CRHS && CRHS->isZero () &&
2317
+ // TODO: make condition below an assert after fixing ballot bitwidth.
2318
+ VCMP.getValueType ().getSizeInBits () == ST->getWavefrontSize ()) {
2319
+ // %VCMP = i(WaveSize) AMDGPUISD::SETCC ...
2320
+ // %C = i1 ISD::SETCC %VCMP, 0, setne/seteq
2321
+ // BRCOND i1 %C, %BB
2322
+ // =>
2323
+ // %VCMP = i(WaveSize) AMDGPUISD::SETCC ...
2324
+ // VCC = COPY i(WaveSize) %VCMP
2325
+ // S_CBRANCH_VCCNZ/VCCZ %BB
2326
+ Negate = CC == ISD::SETEQ;
2327
+ bool NegatedBallot = false ;
2328
+ if (auto BallotCond = combineBallotPattern (VCMP, NegatedBallot)) {
2329
+ Cond = BallotCond;
2330
+ UseSCCBr = !BallotCond->isDivergent ();
2331
+ Negate = Negate ^ NegatedBallot;
2332
+ } else {
2333
+ // TODO: don't use SCC here assuming that AMDGPUISD::SETCC is always
2334
+ // selected as V_CMP, but this may change for uniform condition.
2335
+ Cond = VCMP;
2336
+ UseSCCBr = false ;
2337
+ }
2338
+ }
2339
+ // Cond is either V_CMP resulted from AMDGPUISD::SETCC or a combination of
2340
+ // V_CMPs resulted from ballot or ballot has uniform condition and SCC is
2341
+ // used.
2342
+ AndExec = false ;
2343
+ }
2344
+
2345
+ unsigned BrOp =
2346
+ UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
2347
+ : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
2280
2348
Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC ();
2281
2349
SDLoc SL (N);
2282
2350
2283
- if (!UseSCCBr ) {
2351
+ if (AndExec ) {
2284
2352
// This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
2285
2353
// analyzed what generates the vcc value, so we do not know whether vcc
2286
2354
// bits for disabled lanes are 0. Thus we need to mask out bits for
0 commit comments