Skip to content

Commit 1c96bcd

Browse files
aparshin-intelsys_zuul
authored andcommitted
vc i64 emulation should convert partial predicates to icmp
Change-Id: Ie1486b0bf3a0b4506dff38f1e53b16c847009542
1 parent b3a0f63 commit 1c96bcd

File tree

1 file changed

+60
-24
lines changed

1 file changed

+60
-24
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXEmulate.cpp

Lines changed: 60 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ namespace {
6464
static cl::opt<bool> OptIcmpEnable("genx-i64emu-icmp-enable", cl::init(true),
6565
cl::Hidden,
6666
cl::desc("enable icmp emulation"));
67+
static cl::opt<bool> OptConvertPartialPredicates(
68+
"genx-i64emu-icmp-ppred-lowering", cl::init(true), cl::Hidden,
69+
cl::desc("if \"partial predicates\" shall be converted to icmp"));
70+
6771
using IRBuilder = IRBuilder<TargetFolder>;
6872

6973
class GenXEmulate : public ModulePass {
@@ -205,10 +209,8 @@ class GenXEmulate : public ModulePass {
205209
static AddSubExtResult buildSubb(Module *M, IRBuilder &B, Value &L,
206210
Value &R, const Twine &Prefix);
207211
static Value *buildGeneralICmp(IRBuilder &B, CmpInst::Predicate P,
208-
const LHSplit &L, const LHSplit &R);
209-
static Value *buildICmpEQ(IRBuilder &B, const LHSplit &L, const LHSplit &R);
210-
static Value *buildICmpNE(IRBuilder &B, const LHSplit &L, const LHSplit &R);
211-
212+
bool IsPartialPredicate, const LHSplit &L,
213+
const LHSplit &R);
212214
static Value *tryOptimizedShr(IRBuilder &B, IVSplitter &SplitBuilder,
213215
BinaryOperator &Op, ArrayRef<uint32_t> Sa);
214216
static Value *tryOptimizedShl(IRBuilder &B, IVSplitter &SplitBuilder,
@@ -462,7 +464,14 @@ Value *GenXEmulate::Emu64Expander::visitICmp(ICmpInst &Cmp) {
462464
auto Src0 = Splitter.splitOperandLoHi(0);
463465
auto Src1 = Splitter.splitOperandLoHi(1);
464466

465-
Value *Result = buildGeneralICmp(Builder, Cmp.getPredicate(), Src0, Src1);
467+
bool PartialPredicate =
468+
std::any_of(Cmp.user_begin(), Cmp.user_end(), [](const User *U) {
469+
auto IID = GenXIntrinsic::getAnyIntrinsicID(U);
470+
return IID == GenXIntrinsic::genx_wrpredregion ||
471+
IID == GenXIntrinsic::genx_wrpredpredregion;
472+
});
473+
Value *Result = buildGeneralICmp(Builder, Cmp.getPredicate(),
474+
PartialPredicate, Src0, Src1);
466475

467476
if (Cmp.getType()->isIntegerTy() && !Result->getType()->isIntegerTy()) {
468477
// we expect this cast to be possible
@@ -965,20 +974,6 @@ Value *GenXEmulate::Emu64Expander::buildTernaryAddition(
965974
auto *SubH = Builder.CreateAdd(&A, &B, Name + ".part");
966975
return Builder.CreateAdd(SubH, &C, Name);
967976
}
968-
Value *GenXEmulate::Emu64Expander::buildICmpEQ(IRBuilder &Builder,
969-
const LHSplit &Src0,
970-
const LHSplit &Src1) {
971-
auto *T0 = Builder.CreateICmpEQ(Src0.Lo, Src1.Lo);
972-
auto *T1 = Builder.CreateICmpEQ(Src0.Hi, Src1.Hi);
973-
return Builder.CreateAnd(T0, T1, "emulated_icmp_eq");
974-
}
975-
Value *GenXEmulate::Emu64Expander::buildICmpNE(IRBuilder &Builder,
976-
const LHSplit &Src0,
977-
const LHSplit &Src1) {
978-
auto *T0 = Builder.CreateICmpNE(Src0.Lo, Src1.Lo);
979-
auto *T1 = Builder.CreateICmpNE(Src0.Hi, Src1.Hi);
980-
return Builder.CreateOr(T1, T0, "emulated_icmp_ne");
981-
}
982977
GenXEmulate::Emu64Expander::AddSubExtResult
983978
GenXEmulate::Emu64Expander::buildAddc(Module *M, IRBuilder &Builder, Value &L,
984979
Value &R, const Twine &Prefix) {
@@ -1016,6 +1011,7 @@ GenXEmulate::Emu64Expander::buildSubb(Module *M, IRBuilder &Builder, Value &L,
10161011
}
10171012
Value *GenXEmulate::Emu64Expander::buildGeneralICmp(IRBuilder &Builder,
10181013
CmpInst::Predicate P,
1014+
bool IsPartialPredicate,
10191015
const LHSplit &Src0,
10201016
const LHSplit &Src1) {
10211017

@@ -1057,21 +1053,61 @@ Value *GenXEmulate::Emu64Expander::buildGeneralICmp(IRBuilder &Builder,
10571053
}
10581054
};
10591055

1056+
std::pair<Value *, Value *> ResultParts = {};
10601057
switch (P) {
1061-
case CmpInst::ICMP_EQ:
1062-
return buildICmpEQ(Builder, Src0, Src1);
1063-
case CmpInst::ICMP_NE:
1064-
return buildICmpNE(Builder, Src0, Src1);
1058+
case CmpInst::ICMP_EQ: {
1059+
auto *T0 = Builder.CreateICmpEQ(Src0.Lo, Src1.Lo);
1060+
auto *T1 = Builder.CreateICmpEQ(Src0.Hi, Src1.Hi);
1061+
ResultParts = {T0, T1};
1062+
break;
1063+
}
1064+
case CmpInst::ICMP_NE: {
1065+
auto *T0 = Builder.CreateICmpNE(Src0.Lo, Src1.Lo);
1066+
auto *T1 = Builder.CreateICmpNE(Src0.Hi, Src1.Hi);
1067+
ResultParts = {T0, T1};
1068+
break;
1069+
}
10651070
default: {
10661071
CmpInst::Predicate EmuP1 = getEmulateCond1(P);
10671072
CmpInst::Predicate EmuP2 = getEmulateCond2(P);
10681073
auto *T0 = Builder.CreateICmp(EmuP1, Src0.Lo, Src1.Lo);
10691074
auto *T1 = Builder.CreateICmpEQ(Src0.Hi, Src1.Hi);
10701075
auto *T2 = Builder.CreateAnd(T1, T0);
10711076
auto *T3 = Builder.CreateICmp(EmuP2, Src0.Hi, Src1.Hi);
1072-
return Builder.CreateOr(T2, T3, "int_emu." + CmpInst::getPredicateName(P));
1077+
ResultParts = {T2, T3};
1078+
break;
1079+
}
10731080
}
1081+
auto ResultCond = (P == CmpInst::ICMP_EQ) ? Instruction::BinaryOps::And
1082+
: Instruction::BinaryOps::Or;
1083+
if (!IsPartialPredicate || !OptConvertPartialPredicates) {
1084+
return Builder.CreateBinOp(
1085+
ResultCond, ResultParts.first, ResultParts.second,
1086+
"int_emu.cmp." + CmpInst::getPredicateName(P) + ".");
10741087
}
1088+
// Note:
1089+
// The reason for doing this conversion is that our backend has no
1090+
// convinient way to represent partial updates of predicates with anything
1091+
// except for icmp instructions. In the current codebase we have -
1092+
// we are unable to create a proper visa for the following case ("pseudo" IR):
1093+
// bale {
1094+
// %ne1 = or <8 x i1> %a, %b
1095+
// %j = call <16 x i1> wrpredregion(<16 x i1> undef, <8 x i1> %ne1, i32 0)
1096+
// }
1097+
// bale {
1098+
// %ne2 = or <8 x i1> %c, %d
1099+
// %joined = call <16 x i1> wrpredregion(<16 x i1> %j, <8 x i1> %ne1, i32 8)
1100+
// }
1101+
// As such we convert such cases to the following sequence: 2xsel->or->cmp
1102+
ConstantEmitter K(Src0.Lo);
1103+
auto *L = Builder.CreateSelect(ResultParts.first, K.getOnes(), K.getZero());
1104+
auto *R = Builder.CreateSelect(ResultParts.second, K.getOnes(), K.getZero());
1105+
auto *IPred = Builder.CreateBinOp(ResultCond, L, R,
1106+
"int_emu.cmp.part.int." +
1107+
CmpInst::getPredicateName(P) + ".");
1108+
return Builder.CreateICmpEQ(IPred, K.getOnes(),
1109+
"int_emu.cmp.part.i1" +
1110+
CmpInst::getPredicateName(P) + ".");
10751111
}
10761112
Value *GenXEmulate::Emu64Expander::buildRightShift(IVSplitter &SplitBuilder,
10771113
BinaryOperator &Op) {

0 commit comments

Comments
 (0)