@@ -64,6 +64,10 @@ namespace {
64
64
static cl::opt<bool > OptIcmpEnable (" genx-i64emu-icmp-enable" , cl::init(true ),
65
65
cl::Hidden,
66
66
cl::desc(" enable icmp emulation" ));
67
+ static cl::opt<bool > OptConvertPartialPredicates (
68
+ " genx-i64emu-icmp-ppred-lowering" , cl::init(true ), cl::Hidden,
69
+ cl::desc(" if \" partial predicates\" shall be converted to icmp" ));
70
+
67
71
using IRBuilder = IRBuilder<TargetFolder>;
68
72
69
73
class GenXEmulate : public ModulePass {
@@ -205,10 +209,8 @@ class GenXEmulate : public ModulePass {
205
209
static AddSubExtResult buildSubb (Module *M, IRBuilder &B, Value &L,
206
210
Value &R, const Twine &Prefix);
207
211
static Value *buildGeneralICmp (IRBuilder &B, CmpInst::Predicate P,
208
- const LHSplit &L, const LHSplit &R);
209
- static Value *buildICmpEQ (IRBuilder &B, const LHSplit &L, const LHSplit &R);
210
- static Value *buildICmpNE (IRBuilder &B, const LHSplit &L, const LHSplit &R);
211
-
212
+ bool IsPartialPredicate, const LHSplit &L,
213
+ const LHSplit &R);
212
214
static Value *tryOptimizedShr (IRBuilder &B, IVSplitter &SplitBuilder,
213
215
BinaryOperator &Op, ArrayRef<uint32_t > Sa);
214
216
static Value *tryOptimizedShl (IRBuilder &B, IVSplitter &SplitBuilder,
@@ -462,7 +464,14 @@ Value *GenXEmulate::Emu64Expander::visitICmp(ICmpInst &Cmp) {
462
464
auto Src0 = Splitter.splitOperandLoHi (0 );
463
465
auto Src1 = Splitter.splitOperandLoHi (1 );
464
466
465
- Value *Result = buildGeneralICmp (Builder, Cmp.getPredicate (), Src0, Src1);
467
+ bool PartialPredicate =
468
+ std::any_of (Cmp.user_begin (), Cmp.user_end (), [](const User *U) {
469
+ auto IID = GenXIntrinsic::getAnyIntrinsicID (U);
470
+ return IID == GenXIntrinsic::genx_wrpredregion ||
471
+ IID == GenXIntrinsic::genx_wrpredpredregion;
472
+ });
473
+ Value *Result = buildGeneralICmp (Builder, Cmp.getPredicate (),
474
+ PartialPredicate, Src0, Src1);
466
475
467
476
if (Cmp.getType ()->isIntegerTy () && !Result->getType ()->isIntegerTy ()) {
468
477
// we expect this cast to be possible
@@ -965,20 +974,6 @@ Value *GenXEmulate::Emu64Expander::buildTernaryAddition(
965
974
auto *SubH = Builder.CreateAdd (&A, &B, Name + " .part" );
966
975
return Builder.CreateAdd (SubH, &C, Name);
967
976
}
968
- Value *GenXEmulate::Emu64Expander::buildICmpEQ (IRBuilder &Builder,
969
- const LHSplit &Src0,
970
- const LHSplit &Src1) {
971
- auto *T0 = Builder.CreateICmpEQ (Src0.Lo , Src1.Lo );
972
- auto *T1 = Builder.CreateICmpEQ (Src0.Hi , Src1.Hi );
973
- return Builder.CreateAnd (T0, T1, " emulated_icmp_eq" );
974
- }
975
- Value *GenXEmulate::Emu64Expander::buildICmpNE (IRBuilder &Builder,
976
- const LHSplit &Src0,
977
- const LHSplit &Src1) {
978
- auto *T0 = Builder.CreateICmpNE (Src0.Lo , Src1.Lo );
979
- auto *T1 = Builder.CreateICmpNE (Src0.Hi , Src1.Hi );
980
- return Builder.CreateOr (T1, T0, " emulated_icmp_ne" );
981
- }
982
977
GenXEmulate::Emu64Expander::AddSubExtResult
983
978
GenXEmulate::Emu64Expander::buildAddc (Module *M, IRBuilder &Builder, Value &L,
984
979
Value &R, const Twine &Prefix) {
@@ -1016,6 +1011,7 @@ GenXEmulate::Emu64Expander::buildSubb(Module *M, IRBuilder &Builder, Value &L,
1016
1011
}
1017
1012
Value *GenXEmulate::Emu64Expander::buildGeneralICmp (IRBuilder &Builder,
1018
1013
CmpInst::Predicate P,
1014
+ bool IsPartialPredicate,
1019
1015
const LHSplit &Src0,
1020
1016
const LHSplit &Src1) {
1021
1017
@@ -1057,21 +1053,61 @@ Value *GenXEmulate::Emu64Expander::buildGeneralICmp(IRBuilder &Builder,
1057
1053
}
1058
1054
};
1059
1055
1056
+ std::pair<Value *, Value *> ResultParts = {};
1060
1057
switch (P) {
1061
- case CmpInst::ICMP_EQ:
1062
- return buildICmpEQ (Builder, Src0, Src1);
1063
- case CmpInst::ICMP_NE:
1064
- return buildICmpNE (Builder, Src0, Src1);
1058
+ case CmpInst::ICMP_EQ: {
1059
+ auto *T0 = Builder.CreateICmpEQ (Src0.Lo , Src1.Lo );
1060
+ auto *T1 = Builder.CreateICmpEQ (Src0.Hi , Src1.Hi );
1061
+ ResultParts = {T0, T1};
1062
+ break ;
1063
+ }
1064
+ case CmpInst::ICMP_NE: {
1065
+ auto *T0 = Builder.CreateICmpNE (Src0.Lo , Src1.Lo );
1066
+ auto *T1 = Builder.CreateICmpNE (Src0.Hi , Src1.Hi );
1067
+ ResultParts = {T0, T1};
1068
+ break ;
1069
+ }
1065
1070
default : {
1066
1071
CmpInst::Predicate EmuP1 = getEmulateCond1 (P);
1067
1072
CmpInst::Predicate EmuP2 = getEmulateCond2 (P);
1068
1073
auto *T0 = Builder.CreateICmp (EmuP1, Src0.Lo , Src1.Lo );
1069
1074
auto *T1 = Builder.CreateICmpEQ (Src0.Hi , Src1.Hi );
1070
1075
auto *T2 = Builder.CreateAnd (T1, T0);
1071
1076
auto *T3 = Builder.CreateICmp (EmuP2, Src0.Hi , Src1.Hi );
1072
- return Builder.CreateOr (T2, T3, " int_emu." + CmpInst::getPredicateName (P));
1077
+ ResultParts = {T2, T3};
1078
+ break ;
1079
+ }
1073
1080
}
1081
+ auto ResultCond = (P == CmpInst::ICMP_EQ) ? Instruction::BinaryOps::And
1082
+ : Instruction::BinaryOps::Or;
1083
+ if (!IsPartialPredicate || !OptConvertPartialPredicates) {
1084
+ return Builder.CreateBinOp (
1085
+ ResultCond, ResultParts.first , ResultParts.second ,
1086
+ " int_emu.cmp." + CmpInst::getPredicateName (P) + " ." );
1074
1087
}
1088
+ // Note:
1089
+ // The reason for doing this conversion is that our backend has no
1090
+ // convinient way to represent partial updates of predicates with anything
1091
+ // except for icmp instructions. In the current codebase we have -
1092
+ // we are unable to create a proper visa for the following case ("pseudo" IR):
1093
+ // bale {
1094
+ // %ne1 = or <8 x i1> %a, %b
1095
+ // %j = call <16 x i1> wrpredregion(<16 x i1> undef, <8 x i1> %ne1, i32 0)
1096
+ // }
1097
+ // bale {
1098
+ // %ne2 = or <8 x i1> %c, %d
1099
+ // %joined = call <16 x i1> wrpredregion(<16 x i1> %j, <8 x i1> %ne1, i32 8)
1100
+ // }
1101
+ // As such we convert such cases to the following sequence: 2xsel->or->cmp
1102
+ ConstantEmitter K (Src0.Lo );
1103
+ auto *L = Builder.CreateSelect (ResultParts.first , K.getOnes (), K.getZero ());
1104
+ auto *R = Builder.CreateSelect (ResultParts.second , K.getOnes (), K.getZero ());
1105
+ auto *IPred = Builder.CreateBinOp (ResultCond, L, R,
1106
+ " int_emu.cmp.part.int." +
1107
+ CmpInst::getPredicateName (P) + " ." );
1108
+ return Builder.CreateICmpEQ (IPred, K.getOnes (),
1109
+ " int_emu.cmp.part.i1" +
1110
+ CmpInst::getPredicateName (P) + " ." );
1075
1111
}
1076
1112
Value *GenXEmulate::Emu64Expander::buildRightShift (IVSplitter &SplitBuilder,
1077
1113
BinaryOperator &Op) {
0 commit comments