Skip to content

Commit 8a032d6

Browse files
anikaushikigcbot
authored andcommitted
Remove redundant a0 movs that setup extended descriptor for
sends Remove redundant a0 movs that setup extended descriptor for send instructions. The optimization pass optimizes patterns such as `(W) mov (1|M0) a0.2<1>:ud r[a0.0]<0;1,0>:d {F@1}` `(f0.0) load.ugm.d32.a32.ca.ca (8|M0) r8:1 bss[a0.2][r4:1] {ExBSO,$0}` `(W) mov (1|M0) a0.2<1>:ud r[a0.0]<0;1,0>:d {F@1}` `(f0.0) load.ugm.d32.a32.ca.ca (8|M0) r10:1 bss[a0.2][r5:1] {ExBSO,$2}` `(W) mov (1|M0) a0.2<1>:ud r[a0.0]<0;1,0>:d {F@1}` `(f0.0) load.ugm.d32.a32.ca.ca (8|M0) r11:1 bss[a0.2][r6:1] {ExBSO,$3}` Notice that the extended descriptor movs to a0.2 are redundant and only one is sufficient. With this pass, the code sequence becomes `(W) mov (1|M0) a0.2<1>:ud r[a0.0]<0;1,0>:d {F@1}` `(f0.0) load.ugm.d32.a32.ca.ca (8|M0) r8:1 bss[a0.2][r4:1] {ExBSO,$0}` `(f0.0) load.ugm.d32.a32.ca.ca (8|M0) r10:1 bss[a0.2][r5:1] {ExBSO,$2}` `(f0.0) load.ugm.d32.a32.ca.ca (8|M0) r11:1 bss[a0.2][r6:1] {ExBSO,$3}` This pass identifies such redundant movs and is executed before copy propoagation and the comprehensive LVN pass.
1 parent 99d2a65 commit 8a032d6

File tree

3 files changed

+93
-19
lines changed

3 files changed

+93
-19
lines changed

visa/Optimizer.cpp

Lines changed: 89 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,8 @@ void Optimizer::initOptimizations() {
687687
OPT_INITIALIZE_PASS(initializePayload, vISA_InitPayload, TimerID::NUM_TIMERS);
688688
OPT_INITIALIZE_PASS(cleanupBindless, vISA_enableCleanupBindless,
689689
TimerID::OPTIMIZER);
690+
OPT_INITIALIZE_PASS(cleanupA0Movs, vISA_enableCleanupA0Movs,
691+
TimerID::OPTIMIZER);
690692
OPT_INITIALIZE_PASS(countGRFUsage, vISA_PrintRegUsage, TimerID::MISC_OPTS);
691693
OPT_INITIALIZE_PASS(changeMoveType, vISA_ChangeMoveType, TimerID::MISC_OPTS);
692694
OPT_INITIALIZE_PASS(accSubBeforeRA, vISA_accSubBeforeRA, TimerID::OPTIMIZER);
@@ -866,6 +868,8 @@ int Optimizer::optimization() {
866868

867869
runPass(PI_removePartialMovs);
868870

871+
runPass(PI_cleanupA0Movs);
872+
869873
// remove redundant movs and fold some other patterns
870874
runPass(PI_localCopyPropagation);
871875

@@ -2711,10 +2715,6 @@ void Optimizer::localCopyPropagation() {
27112715
src->asSrcRegRegion()->getAddrImm());
27122716
}
27132717
}
2714-
} else if (inst->hasOneUse()) {
2715-
new_src_opnd = src;
2716-
new_src_opnd->asSrcRegRegion()->setModifier(new_mod);
2717-
new_src_opnd->asSrcRegRegion()->setType(builder, propType);
27182718
} else {
27192719
new_src_opnd = builder.duplicateOperand(src);
27202720
new_src_opnd->asSrcRegRegion()->setModifier(new_mod);
@@ -4578,6 +4578,77 @@ G4_Operand *Optimizer::updateSendsHeaderReuse(
45784578
return nullptr;
45794579
}
45804580

4581+
void Optimizer::cleanupA0Movs() {
4582+
for (auto bb : fg) {
4583+
InstValues values(4);
4584+
for (auto iter = bb->begin(), iterEnd = bb->end(); iter != iterEnd;) {
4585+
G4_INST *inst = *iter;
4586+
4587+
auto isDstExtDesc = [](G4_INST *inst) {
4588+
G4_DstRegRegion *dst = inst->getDst();
4589+
if (dst && dst->getTopDcl() && dst->getTopDcl()->isMsgDesc()) {
4590+
// check that its single use is at src3 of split send
4591+
if (inst->use_size() != 1) {
4592+
return false;
4593+
}
4594+
auto use = inst->use_front();
4595+
G4_INST *useInst = use.first;
4596+
if (useInst->isSend()) {
4597+
return true;
4598+
}
4599+
}
4600+
return false;
4601+
};
4602+
4603+
if (isDstExtDesc(inst)) {
4604+
G4_INST *valInst = values.findValue(inst);
4605+
if (valInst != nullptr) {
4606+
VISA_DEBUG_VERBOSE({
4607+
std::cout << "can replace \n";
4608+
inst->emit(std::cout);
4609+
std::cout << "\n with \n";
4610+
valInst->emit(std::cout);
4611+
std::cout << "\n";
4612+
});
4613+
for (auto I = inst->use_begin(), E = inst->use_end(); I != E; ++I) {
4614+
// each use is in the form of A0(0,0)<0;1,0>:ud in a send
4615+
G4_INST *useInst = I->first;
4616+
Gen4_Operand_Number num = I->second;
4617+
vISA_ASSERT(useInst->isSend(), "use inst must be a send");
4618+
G4_SrcRegRegion *newExDesc =
4619+
builder.createSrc(valInst->getDst()->getBase(), 0, 0,
4620+
builder.getRegionScalar(), Type_UD);
4621+
useInst->setSrc(newExDesc, useInst->getSrcNum(num));
4622+
}
4623+
(*iter)->removeAllDefs();
4624+
(*iter)->transferUse(valInst);
4625+
iter = bb->erase(iter);
4626+
continue;
4627+
} else {
4628+
VISA_DEBUG_VERBOSE({
4629+
std::cout << "add new value:\n";
4630+
inst->emit(std::cout);
4631+
std::cout << "\n";
4632+
});
4633+
// this is necessary since for msg desc we always the physical a0.0,
4634+
// so a new inst will invalidate the previous one
4635+
values.deleteValue(inst);
4636+
values.addValue(inst);
4637+
}
4638+
} else {
4639+
G4_DstRegRegion *dst = inst->getDst();
4640+
if (dst && dst->isDirectAddress()) {
4641+
// If the address register is used for none extdesc
4642+
values.clear();
4643+
} else {
4644+
values.deleteValue(inst);
4645+
}
4646+
}
4647+
++iter;
4648+
}
4649+
}
4650+
}
4651+
45814652
//
45824653
// Perform value numbering on writes to the extended msg descriptor for bindless
45834654
// access of the form op (1) a0.2<1>:ud src0 src1 src2 {NoMask} and remove
@@ -4647,29 +4718,28 @@ void Optimizer::cleanupBindless() {
46474718
auto isDstExtDesc = [](G4_INST *inst) {
46484719
G4_DstRegRegion *dst = inst->getDst();
46494720
if (dst && dst->getTopDcl() && dst->getTopDcl()->isMsgDesc()) {
4650-
// check that its single use is at src3 of split send
4651-
if (inst->use_size() != 1) {
4652-
return false;
4653-
}
4654-
auto use = inst->use_front();
4655-
G4_INST *useInst = use.first;
4656-
if (useInst->isSend()) {
4657-
return true;
4721+
// if a use is something other than a send, do not perform the
4722+
// optimization
4723+
for (auto use = inst->use_begin(); use != inst->use_end(); use++) {
4724+
G4_INST* useInst = use->first;
4725+
if (!useInst->isSend())
4726+
return false;
46584727
}
4728+
return true;
46594729
}
46604730
return false;
46614731
};
46624732

46634733
if (isDstExtDesc(inst)) {
46644734
G4_INST *valInst = values.findValue(inst);
46654735
if (valInst != nullptr) {
4666-
#if 0
4667-
std::cout << "can replace \n";
4668-
inst->emit(std::cout);
4669-
std::cout << "\n with \n";
4670-
valInst->emit(std::cout);
4671-
std::cout << "\n";
4672-
#endif
4736+
VISA_DEBUG_VERBOSE({
4737+
std::cout << "can replace \n";
4738+
inst->emit(std::cout);
4739+
std::cout << "\n with \n";
4740+
valInst->emit(std::cout);
4741+
std::cout << "\n";
4742+
});
46734743
for (auto I = inst->use_begin(), E = inst->use_end(); I != E; ++I) {
46744744
// each use is in the form of A0(0,0)<0;1,0>:ud in a send
46754745
G4_INST *useInst = I->first;

visa/Optimizer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ class Optimizer {
210210
void hoistBarrierHeaderToTop(G4_SrcRegRegion *);
211211
/* end of member functions for message header opt */
212212
void cleanupBindless();
213+
void cleanupA0Movs();
213214
G4_Operand *updateSendsHeaderReuse(std::vector<std::vector<G4_INST *>> &,
214215
std::vector<G4_INST *> &, INST_LIST_ITER);
215216
void countGRFUsage();
@@ -374,6 +375,7 @@ class Optimizer {
374375
PI_createR0Copy,
375376
PI_initializePayload,
376377
PI_cleanupBindless,
378+
PI_cleanupA0Movs,
377379
PI_countGRFUsage,
378380
PI_changeMoveType,
379381
PI_accSubBeforeRA,

visa/include/VISAOptionsDefs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,8 @@ DEF_VISA_OPTION(vISA_KeepScalarJmp, ET_BOOL, "-keepScalarJmp",
134134
"-keepScalarJmp: don't convert scalar jmp to goto", false)
135135
DEF_VISA_OPTION(vISA_enableCleanupBindless, ET_BOOL, "-cleanBindless", UNUSED,
136136
true)
137+
DEF_VISA_OPTION(vISA_enableCleanupA0Movs, ET_BOOL, "-cleanupA0Movs", UNUSED,
138+
true)
137139
DEF_VISA_OPTION(vISA_EnableSplitVariables, ET_BOOL, "-noSplitVariables", UNUSED,
138140
false)
139141
DEF_VISA_OPTION(vISA_ChangeMoveType, ET_BOOL, "-ALTMode", UNUSED, true)

0 commit comments

Comments
 (0)