Skip to content

Commit 5a81a55

Browse files
authored
[GISel] Explicitly disable BF16 tablegen patterns. (#124113)
We currently have an issue where bf16 patters can be used to match fp16 types, as GISel does not know about the difference between the two. This patch explicitly disables them to make sure that they are never used. The opposite can also happen too, where fp16 patterns are used for operators that should be bf16. So this also changes any operations with bf16 types to now cause a fallback to SDAG. The pass setup for GISel has been slightly adjusted to make sure that a verify pass does not get added between AMD-SDAG and SIFixSGPRCopiesPass, which otherwise can cause verifier issues when falling back.
1 parent c310b4e commit 5a81a55

33 files changed

+2072
-4085
lines changed

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -296,8 +296,21 @@ void IRTranslator::addMachineCFGPred(CFGEdge Edge, MachineBasicBlock *NewPred) {
296296
MachinePreds[Edge].push_back(NewPred);
297297
}
298298

299+
static bool containsBF16Type(const User &U) {
300+
// BF16 cannot currently be represented by LLT, to avoid miscompiles we
301+
// prevent any instructions using them. FIXME: This can be removed once LLT
302+
// supports bfloat.
303+
return U.getType()->getScalarType()->isBFloatTy() ||
304+
any_of(U.operands(), [](Value *V) {
305+
return V->getType()->getScalarType()->isBFloatTy();
306+
});
307+
}
308+
299309
bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
300310
MachineIRBuilder &MIRBuilder) {
311+
if (containsBF16Type(U))
312+
return false;
313+
301314
// Get or create a virtual register for each value.
302315
// Unless the value is a Constant => loadimm cst?
303316
// or inline constant each time?
@@ -317,6 +330,9 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
317330

318331
bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U,
319332
MachineIRBuilder &MIRBuilder) {
333+
if (containsBF16Type(U))
334+
return false;
335+
320336
Register Op0 = getOrCreateVReg(*U.getOperand(0));
321337
Register Res = getOrCreateVReg(U);
322338
uint32_t Flags = 0;
@@ -334,6 +350,9 @@ bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
334350

335351
bool IRTranslator::translateCompare(const User &U,
336352
MachineIRBuilder &MIRBuilder) {
353+
if (containsBF16Type(U))
354+
return false;
355+
337356
auto *CI = cast<CmpInst>(&U);
338357
Register Op0 = getOrCreateVReg(*U.getOperand(0));
339358
Register Op1 = getOrCreateVReg(*U.getOperand(1));
@@ -1553,8 +1572,7 @@ bool IRTranslator::translateBitCast(const User &U,
15531572

15541573
bool IRTranslator::translateCast(unsigned Opcode, const User &U,
15551574
MachineIRBuilder &MIRBuilder) {
1556-
if (U.getType()->getScalarType()->isBFloatTy() ||
1557-
U.getOperand(0)->getType()->getScalarType()->isBFloatTy())
1575+
if (containsBF16Type(U))
15581576
return false;
15591577

15601578
uint32_t Flags = 0;
@@ -2647,6 +2665,8 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
26472665

26482666
bool IRTranslator::translateInlineAsm(const CallBase &CB,
26492667
MachineIRBuilder &MIRBuilder) {
2668+
if (containsBF16Type(CB))
2669+
return false;
26502670

26512671
const InlineAsmLowering *ALI = MF->getSubtarget().getInlineAsmLowering();
26522672

@@ -2736,6 +2756,9 @@ bool IRTranslator::translateCallBase(const CallBase &CB,
27362756
}
27372757

27382758
bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
2759+
if (containsBF16Type(U))
2760+
return false;
2761+
27392762
const CallInst &CI = cast<CallInst>(U);
27402763
auto TII = MF->getTarget().getIntrinsicInfo();
27412764
const Function *F = CI.getCalledFunction();
@@ -3371,6 +3394,9 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U,
33713394

33723395
bool IRTranslator::translateAtomicRMW(const User &U,
33733396
MachineIRBuilder &MIRBuilder) {
3397+
if (containsBF16Type(U))
3398+
return false;
3399+
33743400
const AtomicRMWInst &I = cast<AtomicRMWInst>(U);
33753401
auto Flags = TLI->getAtomicMemOperandFlags(I, *DL);
33763402

llvm/lib/CodeGen/TargetPassConfig.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1017,7 +1017,7 @@ bool TargetPassConfig::addCoreISelPasses() {
10171017
if (Selector != SelectorType::GlobalISel || !isGlobalISelAbortEnabled())
10181018
DebugifyIsSafe = false;
10191019

1020-
// Add instruction selector passes.
1020+
// Add instruction selector passes for global isel if enabled.
10211021
if (Selector == SelectorType::GlobalISel) {
10221022
SaveAndRestore SavedAddingMachinePasses(AddingMachinePasses, true);
10231023
if (addIRTranslator())
@@ -1043,15 +1043,14 @@ bool TargetPassConfig::addCoreISelPasses() {
10431043
// Pass to reset the MachineFunction if the ISel failed.
10441044
addPass(createResetMachineFunctionPass(
10451045
reportDiagnosticWhenGlobalISelFallback(), isGlobalISelAbortEnabled()));
1046+
}
10461047

1047-
// Provide a fallback path when we do not want to abort on
1048-
// not-yet-supported input.
1049-
if (!isGlobalISelAbortEnabled() && addInstSelector())
1048+
// Run the SDAG InstSelector, providing a fallback path when we do not want to
1049+
// abort on not-yet-supported input.
1050+
if (Selector != SelectorType::GlobalISel || !isGlobalISelAbortEnabled())
1051+
if (addInstSelector())
10501052
return true;
10511053

1052-
} else if (addInstSelector())
1053-
return true;
1054-
10551054
// Expand pseudo-instructions emitted by ISel. Don't run the verifier before
10561055
// FinalizeISel.
10571056
addPass(&FinalizeISelID);

0 commit comments

Comments
 (0)