@@ -3690,7 +3690,7 @@ bool Optimizer::foldPseudoNot(G4_BB *bb, INST_LIST_ITER &iter) {
3690
3690
}
3691
3691
3692
3692
/* **
3693
- this function optimizes the following cases:
3693
+ this function optmize the following cases:
3694
3694
3695
3695
case 1:
3696
3696
cmp.gt.P0 s0 s1
@@ -3723,7 +3723,7 @@ mov (1) P0 Imm (NoMask)
3723
3723
smov (8) r[A0, 0] src0 src1 Imm
3724
3724
3725
3725
case 5:
3726
- pseudo_not (1) P2 P1
3726
+ psuedo_not (1) P2 P1
3727
3727
and (1) P4 P3 P2
3728
3728
==>
3729
3729
and (1) P4 P3 ~P1
@@ -3818,7 +3818,7 @@ void Optimizer::optimizeLogicOperation() {
3818
3818
merged = foldPseudoAndOr (bb, ii);
3819
3819
}
3820
3820
3821
- // translate the pseudo op
3821
+ // translate the psuedo op
3822
3822
if (!merged) {
3823
3823
expandPseudoLogic (builder, bb, ii);
3824
3824
}
@@ -3835,9 +3835,7 @@ bool Optimizer::foldPseudoAndOr(G4_BB *bb, INST_LIST_ITER &ii) {
3835
3835
3836
3836
// optimization should apply even when the dst of the pseudo-and/pseudo-or is
3837
3837
// global, since we are just hoisting it up, and WAR/WAW checks should be
3838
- // performed as we search for the src0 and src1 inst. Also need to check if
3839
- // the mask option of the pseudo-and/pseudo-or matches with the options of
3840
- // the defining instructions when dst is global.
3838
+ // performed as we search for the src0 and src1 inst.
3841
3839
3842
3840
G4_INST *inst = *ii;
3843
3841
// look for def of srcs
@@ -3854,7 +3852,7 @@ bool Optimizer::foldPseudoAndOr(G4_BB *bb, INST_LIST_ITER &ii) {
3854
3852
3855
3853
The new code uses defInstList directly, and aborts if there are more then are
3856
3854
two definitions. Which means there is more then one instruction writing to
3857
- source. Disadvantage of that is that it is less precise . For example if we
3855
+ source. Disadvantage of that is that it is less precisise . For example if we
3858
3856
are folding in to closest definition then before it was OK, but now will be
3859
3857
disallowed.
3860
3858
*/
@@ -3891,13 +3889,13 @@ bool Optimizer::foldPseudoAndOr(G4_BB *bb, INST_LIST_ITER &ii) {
3891
3889
std::swap (defInstructions[0 ], defInstructions[1 ]);
3892
3890
std::swap (maxSrc1, maxSrc2);
3893
3891
}
3894
- // Doing backward scan until earliest src to make sure dst of and/or is not
3892
+ // Doing backward scan until earlist src to make sure dst of and/or is not
3895
3893
// being written to or being read
3896
3894
/*
3897
3895
handling case like in spmv_csr
3898
- cmp.lt (M1, 1) P15 V40(0,0)<0;1,0> 0x10:w /// $191
3899
- cmp.lt (M1, 1) P16 V110(0,0)<0;1,0> V34(0,0)<0;1,0> /// $192
3900
- and (M1, 1) P16 P16 P15 /// $193
3896
+ cmp.lt (M1, 1) P15 V40(0,0)<0;1,0> 0x10:w /// $191 cmp.lt (M1, 1) P16
3897
+ V110(0,0)<0;1,0> V34(0,0)<0;1,0> /// $192 and (M1,
3898
+ 1) P16 P16 P15 /// $193
3901
3899
*/
3902
3900
if (chkBwdOutputHazard (defInstructions[1 ], ii, defInstructions[0 ])) {
3903
3901
return false ;
@@ -3952,13 +3950,6 @@ bool Optimizer::foldPseudoAndOr(G4_BB *bb, INST_LIST_ITER &ii) {
3952
3950
return false ;
3953
3951
}
3954
3952
3955
- // Check if mask options are mismatched between the pseudo-and/pseudo-or and
3956
- // its defining instructions.
3957
- if ((inst->getMaskOption () != src0DefInst->getMaskOption () ||
3958
- inst->getMaskOption () != src1DefInst->getMaskOption ()) &&
3959
- fg.globalOpndHT .isOpndGlobal (inst->getDst ()))
3960
- return false ;
3961
-
3962
3953
// do the case 3 optimization
3963
3954
3964
3955
G4_PredState ps =
0 commit comments