@@ -24,10 +24,10 @@ SPDX-License-Identifier: MIT
24
24
#include " llvm/IR/Constants.h"
25
25
#include " llvm/IR/DerivedTypes.h"
26
26
#include " llvm/IR/Function.h"
27
+ #include " llvm/IR/IRBuilder.h"
27
28
#include " llvm/IR/Instructions.h"
28
29
#include " llvm/Support/Debug.h"
29
30
#include < unordered_map>
30
- #include " Probe/Assertion.h"
31
31
32
32
#include " llvmWrapper/IR/DerivedTypes.h"
33
33
#include " llvmWrapper/Support/TypeSize.h"
@@ -922,14 +922,63 @@ static Instruction* simplifyConstIndirectRegion(Instruction* Inst) {
922
922
return Inst;
923
923
}
924
924
925
- static Value *simplifyRegionWrite (Instruction *Inst) {
926
- IGC_ASSERT (GenXIntrinsic::isWrRegion (Inst));
927
- Value *NewVal = Inst->getOperand (GenXIntrinsic::GenXRegion::NewValueOperandNum);
925
+ // fold bitcast with wrregion:
926
+ // ==> %oldval.cast = bitcast(%oldval)
927
+ // %2 = bitcast(%1) %3 = wrregion(%oldval.cast, %1, ...)
928
+ // %3 = wrregion(%oldval, %2, ...) %2 = bitcast(%3)
929
+ // so it can be baled later.
930
+ static Value *simplifyBitCastWithRegionWrite (Instruction *WrR,
931
+ const DataLayout &DL,
932
+ const GenXSubtarget &ST) {
933
+ using namespace GenXIntrinsic ::GenXRegion;
934
+ IGC_ASSERT (GenXIntrinsic::isWrRegion (WrR));
935
+ Value *NewVal = WrR->getOperand (NewValueOperandNum);
936
+ auto *BCI = dyn_cast<BitCastInst>(NewVal);
937
+ if (!BCI)
938
+ return nullptr ;
939
+ if (WrR->hasOneUse () && GenXIntrinsic::isWritePredefReg (WrR->user_back ()))
940
+ return nullptr ;
941
+ auto *NewScalarTy = BCI->getSrcTy ()->getScalarType ();
942
+ // Do not change register category to predicate.
943
+ if (NewScalarTy->isIntegerTy (1 ))
944
+ return nullptr ;
945
+ auto *OldVal = WrR->getOperand (OldValueOperandNum);
946
+ if (GenXIntrinsic::isReadWritePredefReg (OldVal))
947
+ return nullptr ;
948
+ auto *NewVecTy = genx::changeVectorType (OldVal->getType (), NewScalarTy);
949
+ if (!NewVecTy)
950
+ return nullptr ;
951
+ Region R = makeRegionFromBaleInfo (WrR, BaleInfo ());
952
+ if (!R.changeElementType (NewScalarTy, &DL))
953
+ return nullptr ;
954
+ // Transformation is not profitable for 2D regions or if it will require
955
+ // legalization.
956
+ if (R.is2D () || R.NumElements > llvm::PowerOf2Floor (
957
+ genx::getExecSizeAllowedBits (WrR, &ST)))
958
+ return nullptr ;
959
+ IRBuilder<> IRB (WrR);
960
+ auto *OldValCast =
961
+ IRB.CreateBitCast (OldVal, NewVecTy, OldVal->getName () + " .cast" );
962
+ auto *NewWrR = R.createWrRegion (OldValCast, BCI->getOperand (0 ),
963
+ WrR->getName (), WrR, WrR->getDebugLoc ());
964
+ auto *NewBCI = IRB.CreateBitCast (NewWrR, WrR->getType (), BCI->getName ());
965
+ return NewBCI;
966
+ }
928
967
968
+ static Value *simplifyRegionWrite (Instruction *WrR) {
969
+ using namespace GenXIntrinsic ::GenXRegion;
970
+ IGC_ASSERT (GenXIntrinsic::isWrRegion (WrR));
971
+ Value *NewVal = WrR->getOperand (NewValueOperandNum);
972
+
973
+ // Replace C with B if R - whole region
974
+ // C = wrregion(A, B, R)
975
+ if (makeRegionFromBaleInfo (WrR, BaleInfo ()).isWhole (WrR->getType ()) &&
976
+ NewVal->getType () == WrR->getType ())
977
+ return NewVal;
929
978
// Replace C with A
930
979
// C = wrregion(A, undef, R)
931
980
if (isa<UndefValue>(NewVal))
932
- return Inst ->getOperand (GenXIntrinsic::GenXRegion:: OldValueOperandNum);
981
+ return WrR ->getOperand (OldValueOperandNum);
933
982
934
983
// When A and undef have the same type, replace C with A
935
984
// B = rdregion(A, R)
@@ -941,29 +990,67 @@ static Value *simplifyRegionWrite(Instruction *Inst) {
941
990
// C = wrregion(A, B, R)
942
991
//
943
992
if (GenXIntrinsic::isRdRegion (NewVal)) {
944
- Instruction *B = cast<Instruction>(NewVal);
945
- Region InnerR = makeRegionFromBaleInfo (B , BaleInfo ());
946
- Region OuterR = makeRegionFromBaleInfo (Inst , BaleInfo ());
993
+ Instruction *RdR = cast<Instruction>(NewVal);
994
+ Region InnerR = makeRegionFromBaleInfo (RdR , BaleInfo ());
995
+ Region OuterR = makeRegionFromBaleInfo (WrR , BaleInfo ());
947
996
if (OuterR != InnerR)
948
997
return nullptr ;
949
998
950
- auto OldValB = B ->getOperand (GenXIntrinsic::GenXRegion:: OldValueOperandNum);
951
- if (GenXIntrinsic::isReadPredefReg (OldValB ))
999
+ auto OldValRdR = RdR ->getOperand (OldValueOperandNum);
1000
+ if (GenXIntrinsic::isReadPredefReg (OldValRdR ))
952
1001
return nullptr ;
953
- auto OldValC = Inst ->getOperand (GenXIntrinsic::GenXRegion:: OldValueOperandNum);
954
- if ((isa<UndefValue>(OldValC ) &&
955
- OldValB ->getType () == OldValC ->getType ()) ||
956
- OldValB == OldValC )
957
- return OldValB ;
1002
+ auto OldValWrR = WrR ->getOperand (OldValueOperandNum);
1003
+ if ((isa<UndefValue>(OldValWrR ) &&
1004
+ OldValRdR ->getType () == OldValWrR ->getType ()) ||
1005
+ OldValRdR == OldValWrR )
1006
+ return OldValRdR ;
958
1007
}
959
-
960
1008
return nullptr ;
961
1009
}
962
1010
1011
+ // fold bitcast with rdregion:
1012
+ // %2 = rdregion(%1, ...) ==> %3 = bitcast(%1)
1013
+ // %3 = bitcast(%2) %2 = rdregion(%3, ...)
1014
+ // so it can be baled later.
1015
+ static Value *simplifyBitCastFromRegionRead (BitCastInst *BCI,
1016
+ const DataLayout &DL,
1017
+ const GenXSubtarget &ST) {
1018
+ using namespace GenXIntrinsic ::GenXRegion;
1019
+ Instruction *RdR = dyn_cast<Instruction>(BCI->getOperand (0 ));
1020
+ if (!RdR || !GenXIntrinsic::isRdRegion (RdR) || !RdR->hasOneUse ())
1021
+ return nullptr ;
1022
+ auto *OldVal = RdR->getOperand (OldValueOperandNum);
1023
+ if (GenXIntrinsic::isReadPredefReg (OldVal))
1024
+ return nullptr ;
1025
+ auto *NewScalarTy = BCI->getDestTy ()->getScalarType ();
1026
+ // Do not change register category to predicate.
1027
+ if (NewScalarTy->isIntegerTy (1 ))
1028
+ return nullptr ;
1029
+ auto *NewVecTy = genx::changeVectorType (OldVal->getType (), NewScalarTy);
1030
+ if (!NewVecTy)
1031
+ return nullptr ;
1032
+ Region R = makeRegionFromBaleInfo (RdR, BaleInfo ());
1033
+ if (!R.changeElementType (NewScalarTy, &DL))
1034
+ return nullptr ;
1035
+ // Transformation is not profitable for 2D regions or if it will require
1036
+ // legalization.
1037
+ if (R.is2D () || R.NumElements > llvm::PowerOf2Floor (
1038
+ genx::getExecSizeAllowedBits (RdR, &ST)))
1039
+ return nullptr ;
1040
+ auto *NewBCI =
1041
+ IRBuilder<>(BCI).CreateBitCast (OldVal, NewVecTy, BCI->getName ());
1042
+ auto *NewRdR =
1043
+ R.createRdRegion (NewBCI, RdR->getName (), BCI, RdR->getDebugLoc ());
1044
+ return NewRdR;
1045
+ }
1046
+
963
1047
static Value *simplifyRegionRead (Instruction *Inst) {
964
1048
IGC_ASSERT (GenXIntrinsic::isRdRegion (Inst));
965
1049
Value *Input = Inst->getOperand (GenXIntrinsic::GenXRegion::OldValueOperandNum);
966
- if (isa<UndefValue>(Input))
1050
+ if (makeRegionFromBaleInfo (Inst, BaleInfo ()).isWhole (Input->getType ()) &&
1051
+ Input->getType () == Inst->getType ())
1052
+ return Input;
1053
+ else if (isa<UndefValue>(Input))
967
1054
return UndefValue::get (Inst->getType ());
968
1055
else if (auto C = dyn_cast<Constant>(Input)) {
969
1056
if (auto Splat = C->getSplatValue ()) {
@@ -990,7 +1077,8 @@ static Value *simplifyRegionRead(Instruction *Inst) {
990
1077
}
991
1078
992
1079
// Simplify a region read or write.
993
- Value *llvm::genx::simplifyRegionInst (Instruction *Inst, const DataLayout *DL) {
1080
+ Value *llvm::genx::simplifyRegionInst (Instruction *Inst, const DataLayout *DL,
1081
+ const GenXSubtarget *ST) {
994
1082
if (Inst->use_empty ())
995
1083
return nullptr ;
996
1084
@@ -1013,11 +1101,17 @@ Value *llvm::genx::simplifyRegionInst(Instruction *Inst, const DataLayout *DL) {
1013
1101
if (Constant *C = ConstantFoldGenX (Inst, *DL))
1014
1102
return C;
1015
1103
1104
+ if (auto *BCI = dyn_cast<BitCastInst>(Inst); BCI && DL && ST)
1105
+ return simplifyBitCastFromRegionRead (BCI, *DL, *ST);
1016
1106
ID = GenXIntrinsic::getGenXIntrinsicID (Inst);
1017
1107
switch (ID) {
1018
1108
case GenXIntrinsic::genx_wrregionf:
1019
1109
case GenXIntrinsic::genx_wrregioni:
1020
- return simplifyRegionWrite (Inst);
1110
+ if (auto *Res = simplifyRegionWrite (Inst))
1111
+ return Res;
1112
+ if (DL && ST)
1113
+ return simplifyBitCastWithRegionWrite (Inst, *DL, *ST);
1114
+ break ;
1021
1115
case GenXIntrinsic::genx_rdregionf:
1022
1116
case GenXIntrinsic::genx_rdregioni:
1023
1117
return simplifyRegionRead (Inst);
@@ -1027,12 +1121,13 @@ Value *llvm::genx::simplifyRegionInst(Instruction *Inst, const DataLayout *DL) {
1027
1121
return nullptr ;
1028
1122
}
1029
1123
1030
- bool llvm::genx::simplifyRegionInsts (Function *F, const DataLayout *DL) {
1124
+ bool llvm::genx::simplifyRegionInsts (Function *F, const DataLayout *DL,
1125
+ const GenXSubtarget *ST) {
1031
1126
bool Changed = false ;
1032
1127
for (auto &BB : F->getBasicBlockList ()) {
1033
1128
for (auto I = BB.begin (); I != BB.end ();) {
1034
1129
Instruction *Inst = &*I++;
1035
- if (auto V = simplifyRegionInst (Inst, DL)) {
1130
+ if (auto V = simplifyRegionInst (Inst, DL, ST )) {
1036
1131
Inst->replaceAllUsesWith (V);
1037
1132
Inst->eraseFromParent ();
1038
1133
Changed = true ;
0 commit comments