@@ -13699,26 +13699,81 @@ void Optimizer::applyNoMaskWA()
13699
13699
// true: if "O" is flag and has assigned a physical flag. This physical reg
13700
13700
// is returned as (freg, fsreg):ty.
13701
13701
// false: otherwise
13702
+ //
13703
+ // Note this code mimics the logic of printRegVarOff() in G4_IR.cpp.
13704
+ //
13705
+ // For pred/condMod, "ty" is the actual size that this "O" accesses,
13706
+ // not the decl size of "O". For example,
13707
+ // cmp (16|M16) (eq)f0.0 ...
13708
+ // this func returns with f(0,0):UW, but "O" is of UD!
13702
13709
static bool getFlagRegAndSubreg(G4_Operand* O, uint32_t& freg, uint32_t& fsreg, G4_Type& ty)
13703
13710
{
13711
+ // flag:
13712
+ // reg no = base's ExRegNum()
13713
+ // subregoff = base's subregoff + Operand's subregoff (in UW)
13714
+ //
13715
+ // Type difference b/w base and operand is not considered here for flag as
13716
+ // the base's type is always UW. Operand's type can be UW/UD. If operand's type is UD,
13717
+ // its subregoff in UD must be 0, which is the same as one in UW. Therefore, simply
13718
+ // treat operand's subRegOff as in UW.
13719
+ uint32_t nSubFlag = (O->getRightBound() - O->getLeftBound() + 16) / 16;
13720
+ uint32_t subregoff = 0;
13721
+ if (O->isSrcRegRegion())
13722
+ {
13723
+ subregoff = O->asSrcRegRegion()->getSubRegOff();
13724
+ }
13725
+ else if (O->isDstRegRegion())
13726
+ {
13727
+ subregoff = O->asDstRegRegion()->getSubRegOff();
13728
+ }
13729
+ else if (O->isPredicate())
13730
+ {
13731
+ subregoff = O->asPredicate()->getSubRegOff();
13732
+ }
13733
+ else if (O->isCondMod())
13734
+ {
13735
+ subregoff = O->asCondMod()->getSubRegOff();
13736
+ }
13737
+
13704
13738
G4_VarBase* BVar = O->getBase();
13705
- uint32_t nelts = (O->getRightBound() - O->getLeftBound() + 16) / 16;
13706
- ty = (nelts == 1 ? Type_UW : Type_UD);
13739
+ ty = (nSubFlag == 1 ? Type_UW : Type_UD);
13707
13740
bool isValid = false;
13708
13741
if (BVar)
13709
13742
{
13710
13743
freg = BVar->ExRegNum(isValid);
13711
- fsreg = BVar->asRegVar()->getPhyRegOff();
13744
+ fsreg = BVar->asRegVar()->getPhyRegOff() + subregoff ;
13712
13745
}
13713
13746
return isValid;
13714
13747
}
13715
13748
13716
13749
private:
13717
- uint16_t getFlagBits(G4_Operand* O) {
13750
+ uint16_t getFlagBits(G4_Operand* O)
13751
+ {
13718
13752
uint32_t r, sr;
13719
13753
G4_Type t;
13720
13754
if (getFlagRegAndSubreg(O, r, sr, t))
13721
13755
{
13756
+ // For the following cases, getFlagRegAndSubreg() returns with r=1, sr=0, ty=UW.
13757
+ // But they really access f1.1. Thus, do adjustment to get the right flag bits!
13758
+ // cmp (16|M16) (eq)f1.0 ...
13759
+ // (f1.0) mov (16|M16) ....
13760
+ if ((O->isPredicate() || O->isCondMod()) && t == Type_UW)
13761
+ {
13762
+ // sanity check: subreg could be 1 only if rightBound < 16
13763
+ assert(sr == 0 || O->getRightBound() < 16);
13764
+
13765
+ if (O->getLeftBound() >= 16)
13766
+ {
13767
+ // typical cases like ones in comments above
13768
+ sr = 1;
13769
+ }
13770
+ else if (O->getRightBound() >= 16)
13771
+ {
13772
+ // cross two sub-flags (f1.0 and f1.1). Reset t to UD
13773
+ t = Type_UD;
13774
+ }
13775
+ }
13776
+
13722
13777
uint16_t bits = (t == Type_UD ? 0x3 : 0x1);
13723
13778
return (bits << (r * 2 + sr));
13724
13779
}
0 commit comments