Skip to content

Commit 2e455a9

Browse files
authored
Re-apply multiple fixes (#16602)
Re-apply multiple fixes c8f734d Fix a warning issue related to the overloaded struct 77fb673 GEP canon is on only for OCL 8d12a0f avoid y*(1/x) for double precision type fbf1aa9 [IGC VC] GenXVerify pass, initial 0ae6dfb SetMaxRegForThreadDispatch was hardcoded for up to 128 GRFs. 932eafa Minor update to DisableRecompilation regkey description bc3034f Fix bugs and update the LIT test for linear scan RA 45f1295 Enable GRF read delay of send stall instructions 7c95f49 [Autobackout][FuncReg]Revert of change: 3f0c186 620c74c Fix a few register regioning issues for 64b instructions on MTL platform
1 parent 7ed14f4 commit 2e455a9

25 files changed

+830
-57
lines changed

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5418,7 +5418,7 @@ namespace IGC
54185418
context->type == ShaderType::GEOMETRY_SHADER || context->type == ShaderType::HULL_SHADER)
54195419
{
54205420
unsigned maxReg = m_program->GetMaxRegForThreadDispatch();
5421-
V(vKernel->AddKernelAttribute("MaxRegThreadDispatch", 1, &maxReg));
5421+
V(vKernel->AddKernelAttribute("MaxRegThreadDispatch", sizeof(maxReg), &maxReg));
54225422
}
54235423
}
54245424

IGC/Compiler/CISACodeGen/CoalescingEngine.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -251,8 +251,8 @@ namespace IGC
251251
SetCurrentPart(tupleGeneratingInstruction, numPart);
252252
const uint numOperands = GetNumPayloadElements(tupleGeneratingInstruction);
253253
bool isAnyNodeAnchored = false, isAnyNodeCoalescable = false;
254-
SmallPtrSet<CCTuple*, 8> touchedTuplesSet;
255-
SmallVector<CCTuple*, 8> touchedTuples;
254+
SmallPtrSet<CCTuple*, MaxTouchedTuples> touchedTuplesSet;
255+
SmallVector<CCTuple*, MaxTouchedTuples> touchedTuples;
256256

257257
//Step: Prepare.
258258
PrepareTuple(
@@ -371,7 +371,7 @@ namespace IGC
371371
//provided that elements in dominatorsForDisplacement are displaced, and other nodes are attached.
372372
//If interferes is true, then no element will be attached to the ccTuple.
373373
if (!interferes) {
374-
SmallPtrSet<Value*, 8> touchedValuesSet;
374+
SmallPtrSet<Value*, MaxTouchedTuples> touchedValuesSet;
375375
for (uint i = 0; i < numOperands; i++) {
376376
Value* val = GetPayloadElementToValueMapping(tupleGeneratingInstruction, i);
377377

@@ -575,8 +575,8 @@ namespace IGC
575575
void CoalescingEngine::PrepareTuple(
576576
const uint numOperands,
577577
Instruction* tupleGeneratingInstruction,
578-
SmallPtrSet<CCTuple*, 8> & touchedTuplesSet,
579-
SmallVector<CCTuple*, 8> & touchedTuples,
578+
SmallPtrSet<CCTuple*, MaxTouchedTuples> & touchedTuplesSet,
579+
SmallVector<CCTuple*, MaxTouchedTuples> & touchedTuples,
580580
bool& isAnyNodeAnchored,
581581
bool& isAnyNodeCoalescable)
582582
{
@@ -843,7 +843,7 @@ namespace IGC
843843
CCTuple* ccTuple,
844844
ElementFunctor* functor)
845845
{
846-
SmallPtrSet<Value*, 8> touchedValuesSet;
846+
SmallPtrSet<Value*, MaxTouchedTuples> touchedValuesSet;
847847

848848
for (uint i = 0; i < numOperands; i++) {
849849
functor->SetIndex(i);
@@ -1001,7 +1001,7 @@ namespace IGC
10011001
CCTuple* ccTuple,
10021002
ProcessInterferencesElementFunctor* interferencesFunctor)
10031003
{
1004-
SmallPtrSet<Value*, 8> touchedValuesSet;
1004+
SmallPtrSet<Value*, MaxTouchedTuples> touchedValuesSet;
10051005
GatherWeightElementFunctor gatherFunctor;
10061006
ProcessElements(numOperands, tupleInst, offsetDiff, ccTuple, &gatherFunctor);
10071007
bool forceEviction =
@@ -1071,7 +1071,7 @@ namespace IGC
10711071

10721072
if (ccTuple)
10731073
{
1074-
SmallPtrSet<Value*, 8> touchedValuesSet;
1074+
SmallPtrSet<Value*, MaxTouchedTuples> touchedValuesSet;
10751075

10761076
//index = 0;
10771077
payloadCovered = true;
@@ -1179,7 +1179,7 @@ namespace IGC
11791179
}
11801180

11811181
if (payloadCovered) {
1182-
SmallPtrSet<Value*, 8> touchedValuesSet;
1182+
SmallPtrSet<Value*, MaxTouchedTuples> touchedValuesSet;
11831183

11841184
for (uint index = 0; index < numOperands; index++)
11851185
{

IGC/Compiler/CISACodeGen/CoalescingEngine.hpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ namespace IGC {
3434
class CoalescingEngine : public llvm::FunctionPass, public llvm::InstVisitor<CoalescingEngine>
3535
{
3636
//TODO: this is fixed for now, but once we have pressure heuristic, could be relaxed
37-
static const int MaxTupleSize = 12;
37+
static const int MaxTouchedTuples = 8;
38+
static const int MaxTupleSize = MaxTouchedTuples + 4;
3839

3940
public:
4041
static char ID; // Pass identification, replacement for typeid
@@ -327,7 +328,7 @@ namespace IGC {
327328
else {
328329
uint numUsers = 0;
329330
{
330-
llvm::SmallPtrSet<llvm::User*, 8> touchedUsers;
331+
llvm::SmallPtrSet<llvm::User*, MaxTouchedTuples> touchedUsers;
331332
for (llvm::Value::user_iterator i = val->user_begin(), e = val->user_end(); i != e; ++i) {
332333
llvm::User* user = *i;
333334
if (llvm::isa<llvm::Instruction>(user)) {
@@ -388,8 +389,8 @@ namespace IGC {
388389
void PrepareTuple(
389390
const uint numOperands,
390391
llvm::Instruction* tupleGeneratingInstruction,
391-
llvm::SmallPtrSet<CCTuple*, 8> & touchedTuplesSet,
392-
llvm::SmallVector<CCTuple*, 8> & touchedTuples,
392+
llvm::SmallPtrSet<CCTuple*, MaxTouchedTuples> & touchedTuplesSet,
393+
llvm::SmallVector<CCTuple*, MaxTouchedTuples> & touchedTuples,
393394
bool& isAnyNodeAnchored,
394395
bool& isAnyNodeCoalescable);
395396

@@ -712,7 +713,7 @@ namespace IGC {
712713
CCTuple* m_ccTuple;
713714
CoalescingEngine* m_CE;
714715
int m_index;
715-
llvm::SmallPtrSet<llvm::Value*, 8> m_valuesForIsolation;
716+
llvm::SmallPtrSet<llvm::Value*, MaxTouchedTuples> m_valuesForIsolation;
716717

717718
public:
718719
ProcessInterferencesElementFunctor(
@@ -732,7 +733,7 @@ namespace IGC {
732733

733734
}
734735

735-
llvm::SmallPtrSet<llvm::Value*, 8> & GetComputedValuesForIsolation()
736+
llvm::SmallPtrSet<llvm::Value*, MaxTouchedTuples> & GetComputedValuesForIsolation()
736737
{
737738
return m_valuesForIsolation;
738739
}

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4089,7 +4089,10 @@ void EmitPass::BinaryUnary(llvm::Instruction* inst, const SSource source[2], con
40894089
break;
40904090
case Instruction::FDiv:
40914091
{
4092-
if (inst->getType()->isDoubleTy() && !inst->hasApproxFunc())
4092+
bool canUseFast = inst->hasApproxFunc() ||
4093+
(inst->hasAllowReciprocal() && !isOne(source[0].value));
4094+
4095+
if (inst->getType()->isDoubleTy() && !canUseFast)
40934096
{ // default : ieee fdiv
40944097
EmitSimpleAlu(llvm_ieee_divide, source, modifier);
40954098
}

IGC/Compiler/CISACodeGen/MemOpt.cpp

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,22 @@ namespace {
321321
return true;
322322
}
323323

324+
bool EnableCanonicalizeGEP() const {
325+
switch (IGC_GET_FLAG_VALUE(MemOptGEPCanon)) {
326+
case 1:
327+
return false;
328+
case 2:
329+
{
330+
if (CGC && CGC->type == ShaderType::OPENCL_SHADER)
331+
return false;
332+
break;
333+
}
334+
default:
335+
break;
336+
}
337+
return true;
338+
}
339+
324340
/// Canonicalize the calculation of 64-bit pointer by performing the
325341
/// following transformations to help SCEV to identify the constant offset
326342
/// between pointers.
@@ -526,7 +542,7 @@ bool MemOpt::runOnFunction(Function& F) {
526542
if (MemRefs.size() < 2)
527543
continue;
528544

529-
if (IGC_IS_FLAG_ENABLED(EnableMemOptGEPCanon)) {
545+
if (EnableCanonicalizeGEP()) {
530546
// Canonicalize 64-bit GEP to help SCEV find constant offset by
531547
// distributing `zext`/`sext` over safe expressions.
532548
for (auto& M : MemRefs)
@@ -553,7 +569,7 @@ bool MemOpt::runOnFunction(Function& F) {
553569
}
554570
}
555571

556-
if (IGC_IS_FLAG_ENABLED(EnableMemOptGEPCanon)) {
572+
if (EnableCanonicalizeGEP()) {
557573
// Optimize 64-bit GEP to reduce strength by factoring out `zext`/`sext`
558574
// over safe expressions.
559575
for (auto I : MemRefsToOptimize)
@@ -1137,7 +1153,7 @@ bool MemOpt::mergeLoad(LoadInst* LeadingLoad,
11371153
return false;
11381154
const SCEV* LeadingLastIdx = nullptr; // set on-demand
11391155
bool DoCmpOnLastIdx = false;
1140-
if (IGC_IS_FLAG_DISABLED(EnableMemOptGEPCanon)) {
1156+
if (!EnableCanonicalizeGEP()) {
11411157
auto aGEP = dyn_cast<GetElementPtrInst>(LeadingLoad->getPointerOperand());
11421158
if (aGEP && aGEP->hasIndices()) {
11431159
// index starts from 1
@@ -1305,8 +1321,8 @@ bool MemOpt::mergeLoad(LoadInst* LeadingLoad,
13051321
MaxElts = profitVec[k++];
13061322
}
13071323

1308-
if (IGC_IS_FLAG_ENABLED(EnableMemOptGEPCanon)) {
1309-
// Guard under the key to distinguish new code (EnableMemOptGEPCanon=0) from the old.
1324+
if (EnableCanonicalizeGEP()) {
1325+
// Guard under the key to distinguish new code (GEPCanon is off) from the old.
13101326
// Note: not sure about the reason for the following check.
13111327
if (NumElts == 3 && (LeadingLoadScalarType->isIntegerTy(16) || LeadingLoadScalarType->isHalfTy())) {
13121328
return false;

IGC/Compiler/CISACodeGen/PatternMatchPass.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1185,6 +1185,9 @@ namespace IGC
11851185
MatchModifier(I);
11861186
break;
11871187
case Instruction::FMul:
1188+
match = MatchArcpFdiv(I) ||
1189+
MatchModifier(I);
1190+
break;
11881191
case Instruction::URem:
11891192
case Instruction::SRem:
11901193
case Instruction::FRem:
@@ -4886,6 +4889,77 @@ namespace IGC
48864889
return found;
48874890
}
48884891

4892+
bool CodeGenPatternMatch::MatchArcpFdiv(llvm::BinaryOperator& I)
4893+
{
4894+
4895+
using namespace llvm::PatternMatch;
4896+
4897+
struct ArcpFdivPattern : public Pattern
4898+
{
4899+
SSource sources[2];
4900+
virtual void Emit(EmitPass* pass, const DstModifier& modifier)
4901+
{
4902+
pass->FDiv(sources, modifier);
4903+
}
4904+
};
4905+
4906+
if (!I.getType()->isDoubleTy() || !I.hasAllowReciprocal())
4907+
return false;
4908+
4909+
// Look for fdiv.
4910+
Instruction* fdiv = nullptr;
4911+
Value* dividend = nullptr, * divisor = nullptr;
4912+
4913+
auto fdivPattern = m_OneUse(m_FDiv(m_FPOne(), m_Value(divisor)));
4914+
4915+
if (match(I.getOperand(0), fdivPattern))
4916+
{
4917+
fdiv = dyn_cast<Instruction>(I.getOperand(0));
4918+
dividend = I.getOperand(1);
4919+
}
4920+
else if (match(I.getOperand(1), fdivPattern))
4921+
{
4922+
fdiv = dyn_cast<Instruction>(I.getOperand(1));
4923+
dividend = I.getOperand(0);
4924+
}
4925+
4926+
if (!fdiv || !fdiv->hasAllowReciprocal())
4927+
return false;
4928+
4929+
// Pattern found.
4930+
ArcpFdivPattern* pattern = new (m_allocator)ArcpFdivPattern();
4931+
Value* sources[2] = { dividend, divisor };
4932+
e_modifier src_mod[2] = {};
4933+
4934+
if (FlushesDenormsOnInput(*fdiv))
4935+
{
4936+
sources[0] = SkipCanonicalize(sources[0]);
4937+
sources[1] = SkipCanonicalize(sources[1]);
4938+
}
4939+
4940+
GetModifier(*sources[0], src_mod[0], sources[0]);
4941+
GetModifier(*sources[1], src_mod[1], sources[1]);
4942+
4943+
pattern->sources[0] = GetSource(sources[0], src_mod[0], false, IsSourceOfSample(&I));
4944+
pattern->sources[1] = GetSource(sources[1], src_mod[1], false, IsSourceOfSample(&I));
4945+
4946+
// Try to add to constant pool whatever possible.
4947+
if (isCandidateForConstantPool(sources[0]))
4948+
{
4949+
AddToConstantPool(I.getParent(), sources[0]);
4950+
pattern->sources[0].fromConstantPool = true;
4951+
}
4952+
if (isCandidateForConstantPool(sources[1]))
4953+
{
4954+
AddToConstantPool(I.getParent(), sources[1]);
4955+
pattern->sources[1].fromConstantPool = true;
4956+
}
4957+
4958+
AddPattern(pattern);
4959+
4960+
return true;
4961+
}
4962+
48894963
bool CodeGenPatternMatch::MatchGradient(llvm::GenIntrinsicInst& I)
48904964
{
48914965
struct GradientPattern : public Pattern

IGC/Compiler/CISACodeGen/PatternMatchPass.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ namespace IGC
225225
bool MatchDp4a(llvm::GenIntrinsicInst& I);
226226
bool MatchLogicAlu(llvm::BinaryOperator& I);
227227
bool MatchRsqrt(llvm::BinaryOperator& I);
228+
bool MatchArcpFdiv(llvm::BinaryOperator& I);
228229
bool MatchBlockReadWritePointer(llvm::GenIntrinsicInst& I);
229230
bool MatchGradient(llvm::GenIntrinsicInst& I);
230231
bool MatchSampleDerivative(llvm::GenIntrinsicInst& I);

IGC/Compiler/CustomSafeOptPass.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5073,6 +5073,7 @@ bool GenStrengthReduction::processInst(Instruction* Inst)
50735073
}
50745074
Inv = BinaryOperator::CreateFDiv(Src0, Src1, "", insertBefore);
50755075
Inv->setFastMathFlags(Inst->getFastMathFlags());
5076+
Inv->setDebugLoc(Inst->getDebugLoc());
50765077
}
50775078

50785079
Instruction* Mul = BinaryOperator::CreateFMul(I->getOperand(0), Inv, "", I);
@@ -5087,6 +5088,9 @@ bool GenStrengthReduction::processInst(Instruction* Inst)
50875088

50885089
if (!Inv)
50895090
{
5091+
if (Inst->getType()->isDoubleTy())
5092+
return false;
5093+
50905094
// Only a single use of 1 / Src1. Create Inv right before the use.
50915095
Inv = BinaryOperator::CreateFDiv(Src0, Src1, "", Inst);
50925096
Inv->setFastMathFlags(Inst->getFastMathFlags());

IGC/Compiler/Optimizer/SynchronizationObjectCoalescing.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1081,9 +1081,9 @@ bool SynchronizationObjectCoalescing::IsReturnOperation(const llvm::Instruction*
10811081
template<class... Ts> struct overloaded : Ts...
10821082
{
10831083
template<typename T>
1084-
overloaded<Ts...>& operator=(T&& lambda)
1084+
overloaded<Ts...>& operator=(const T& lambda)
10851085
{
1086-
((static_cast<Ts&>(*this) = std::forward<T&&>(lambda)), ...);
1086+
((static_cast<Ts&>(*this) = lambda), ...);
10871087
return *this;
10881088
}
10891089

0 commit comments

Comments
 (0)