Skip to content

Commit 8224b22

Browse files
jgu222igcbot
authored andcommitted
Fixed incorrect flag subreg calculation.
After RA, flag's subreg should be its decl's subregoff + flag operand's subregoff. Using decl's subregoff alone isn't correct.
1 parent a3af5c9 commit 8224b22

File tree

1 file changed

+59
-4
lines changed

1 file changed

+59
-4
lines changed

visa/Optimizer.cpp

Lines changed: 59 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13699,26 +13699,81 @@ void Optimizer::applyNoMaskWA()
1369913699
// true: if "O" is flag and has assigned a physical flag. This physical reg
1370013700
// is returned as (freg, fsreg):ty.
1370113701
// false: otherwise
13702+
//
13703+
// Note this code mimics the logic of printRegVarOff() in G4_IR.cpp.
13704+
//
13705+
// For pred/condMod, "ty" is the actual size that this "O" accesses,
13706+
// not the decl size of "O". For example,
13707+
// cmp (16|M16) (eq)f0.0 ...
13708+
// this func returns with f(0,0):UW, but "O" is of UD!
1370213709
static bool getFlagRegAndSubreg(G4_Operand* O, uint32_t& freg, uint32_t& fsreg, G4_Type& ty)
1370313710
{
13711+
// flag:
13712+
// reg no = base's ExRegNum()
13713+
// subregoff = base's subregoff + Operand's subregoff (in UW)
13714+
//
13715+
// Type difference b/w base and operand is not considered here for flag as
13716+
// the base's type is always UW. Operand's type can be UW/UD. If operand's type is UD,
13717+
// its subregoff in UD must be 0, which is the same as one in UW. Therefore, simply
13718+
// treat operand's subRegOff as in UW.
13719+
uint32_t nSubFlag = (O->getRightBound() - O->getLeftBound() + 16) / 16;
13720+
uint32_t subregoff = 0;
13721+
if (O->isSrcRegRegion())
13722+
{
13723+
subregoff = O->asSrcRegRegion()->getSubRegOff();
13724+
}
13725+
else if (O->isDstRegRegion())
13726+
{
13727+
subregoff = O->asDstRegRegion()->getSubRegOff();
13728+
}
13729+
else if (O->isPredicate())
13730+
{
13731+
subregoff = O->asPredicate()->getSubRegOff();
13732+
}
13733+
else if (O->isCondMod())
13734+
{
13735+
subregoff = O->asCondMod()->getSubRegOff();
13736+
}
13737+
1370413738
G4_VarBase* BVar = O->getBase();
13705-
uint32_t nelts = (O->getRightBound() - O->getLeftBound() + 16) / 16;
13706-
ty = (nelts == 1 ? Type_UW : Type_UD);
13739+
ty = (nSubFlag == 1 ? Type_UW : Type_UD);
1370713740
bool isValid = false;
1370813741
if (BVar)
1370913742
{
1371013743
freg = BVar->ExRegNum(isValid);
13711-
fsreg = BVar->asRegVar()->getPhyRegOff();
13744+
fsreg = BVar->asRegVar()->getPhyRegOff() + subregoff;
1371213745
}
1371313746
return isValid;
1371413747
}
1371513748

1371613749
private:
13717-
uint16_t getFlagBits(G4_Operand* O) {
13750+
uint16_t getFlagBits(G4_Operand* O)
13751+
{
1371813752
uint32_t r, sr;
1371913753
G4_Type t;
1372013754
if (getFlagRegAndSubreg(O, r, sr, t))
1372113755
{
13756+
// For the following cases, getFlagRegAndSubreg() returns with r=1, sr=0, ty=UW.
13757+
// But they really access f1.1. Thus, do adjustment to get the right flag bits!
13758+
// cmp (16|M16) (eq)f1.0 ...
13759+
// (f1.0) mov (16|M16) ....
13760+
if ((O->isPredicate() || O->isCondMod()) && t == Type_UW)
13761+
{
13762+
// sanity check: subreg could be 1 only if rightBound < 16
13763+
assert(sr == 0 || O->getRightBound() < 16);
13764+
13765+
if (O->getLeftBound() >= 16)
13766+
{
13767+
// typical cases like ones in comments above
13768+
sr = 1;
13769+
}
13770+
else if (O->getRightBound() >= 16)
13771+
{
13772+
// cross two sub-flags (f1.0 and f1.1). Reset t to UD
13773+
t = Type_UD;
13774+
}
13775+
}
13776+
1372213777
uint16_t bits = (t == Type_UD ? 0x3 : 0x1);
1372313778
return (bits << (r * 2 + sr));
1372413779
}

0 commit comments

Comments
 (0)