Skip to content

Commit 62a6ab5

Browse files
committed
[LV]Support dropping of nneg flag for zext widencast recipes.
Compiler crashes when the assertion triggered for zext nneg instruction, that checks that the instruction cannot produce poison. Changed the base class for widencast recipe to handle dropping nneg flag to avoid compiler crash.
1 parent 600c129 commit 62a6ab5

File tree

6 files changed

+150
-8
lines changed

6 files changed

+150
-8
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8578,8 +8578,8 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
85788578
}
85798579

85808580
if (auto *CI = dyn_cast<CastInst>(Instr)) {
8581-
return toVPRecipeResult(
8582-
new VPWidenCastRecipe(CI->getOpcode(), Operands[0], CI->getType(), CI));
8581+
return toVPRecipeResult(new VPWidenCastRecipe(CI->getOpcode(), Operands[0],
8582+
CI->getType(), *CI));
85838583
}
85848584

85858585
return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan));

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -829,6 +829,7 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
829829
PossiblyExactOp,
830830
GEPOp,
831831
FPMathOp,
832+
NonNegOp,
832833
Other
833834
};
834835

@@ -847,6 +848,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
847848
struct GEPFlagsTy {
848849
char IsInBounds : 1;
849850
};
851+
struct NonNegFlagsTy {
852+
char NonNeg : 1;
853+
};
850854
struct FastMathFlagsTy {
851855
char AllowReassoc : 1;
852856
char NoNaNs : 1;
@@ -866,6 +870,7 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
866870
WrapFlagsTy WrapFlags;
867871
ExactFlagsTy ExactFlags;
868872
GEPFlagsTy GEPFlags;
873+
NonNegFlagsTy NonNegFlags;
869874
FastMathFlagsTy FMFs;
870875
unsigned AllFlags;
871876
};
@@ -893,6 +898,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
893898
} else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
894899
OpType = OperationType::GEPOp;
895900
GEPFlags.IsInBounds = GEP->isInBounds();
901+
} else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
902+
OpType = OperationType::NonNegOp;
903+
NonNegFlags.NonNeg = PNNI->hasNonNeg();
896904
} else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
897905
OpType = OperationType::FPMathOp;
898906
FMFs = Op->getFastMathFlags();
@@ -921,6 +929,7 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
921929
return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
922930
R->getVPDefID() == VPRecipeBase::VPWidenSC ||
923931
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
932+
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
924933
R->getVPDefID() == VPRecipeBase::VPReplicateSC;
925934
}
926935

@@ -943,6 +952,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
943952
FMFs.NoNaNs = false;
944953
FMFs.NoInfs = false;
945954
break;
955+
case OperationType::NonNegOp:
956+
NonNegFlags.NonNeg = false;
957+
break;
946958
case OperationType::Cmp:
947959
case OperationType::Other:
948960
break;
@@ -971,6 +983,9 @@ class VPRecipeWithIRFlags : public VPRecipeBase {
971983
I->setHasAllowContract(FMFs.AllowContract);
972984
I->setHasApproxFunc(FMFs.ApproxFunc);
973985
break;
986+
case OperationType::NonNegOp:
987+
I->setNonNeg(NonNegFlags.NonNeg);
988+
break;
974989
case OperationType::Cmp:
975990
case OperationType::Other:
976991
break;
@@ -1177,7 +1192,7 @@ class VPWidenRecipe : public VPRecipeWithIRFlags, public VPValue {
11771192
};
11781193

11791194
/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1180-
class VPWidenCastRecipe : public VPRecipeBase, public VPValue {
1195+
class VPWidenCastRecipe : public VPRecipeWithIRFlags, public VPValue {
11811196
/// Cast instruction opcode.
11821197
Instruction::CastOps Opcode;
11831198

@@ -1186,15 +1201,19 @@ class VPWidenCastRecipe : public VPRecipeBase, public VPValue {
11861201

11871202
public:
11881203
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
1189-
CastInst *UI = nullptr)
1190-
: VPRecipeBase(VPDef::VPWidenCastSC, Op), VPValue(this, UI),
1204+
CastInst &UI)
1205+
: VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), VPValue(this, &UI),
11911206
Opcode(Opcode), ResultTy(ResultTy) {
1192-
assert((!UI || UI->getOpcode() == Opcode) &&
1207+
assert(UI.getOpcode() == Opcode &&
11931208
"opcode of underlying cast doesn't match");
1194-
assert((!UI || UI->getType() == ResultTy) &&
1209+
assert(UI.getType() == ResultTy &&
11951210
"result type of underlying cast doesn't match");
11961211
}
11971212

1213+
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
1214+
: VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), VPValue(this, nullptr),
1215+
Opcode(Opcode), ResultTy(ResultTy) {}
1216+
11981217
~VPWidenCastRecipe() override = default;
11991218

12001219
VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -652,6 +652,10 @@ void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
652652
if (GEPFlags.IsInBounds)
653653
O << " inbounds";
654654
break;
655+
case OperationType::NonNegOp:
656+
if (NonNegFlags.NonNeg)
657+
O << " nneg";
658+
break;
655659
case OperationType::Other:
656660
break;
657661
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
8181
NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands());
8282
} else if (auto *CI = dyn_cast<CastInst>(Inst)) {
8383
NewRecipe = new VPWidenCastRecipe(
84-
CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI);
84+
CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), *CI);
8585
} else {
8686
NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands());
8787
}

llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,84 @@ loop.exit:
268268
ret void
269269
}
270270

271+
define void @drop_zext_nneg(ptr noalias %p, ptr noalias %p1) #0 {
272+
; CHECK-LABEL: define void @drop_zext_nneg(
273+
; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[P1:%.*]]) #[[ATTR0:[0-9]+]] {
274+
; CHECK-NEXT: entry:
275+
; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
276+
; CHECK: vector.scevcheck:
277+
; CHECK-NEXT: br i1 true, label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
278+
; CHECK: vector.ph:
279+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
280+
; CHECK: vector.body:
281+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
282+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
283+
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[VEC_IND]], zeroinitializer
284+
; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i32> [[VEC_IND]] to <4 x i64>
285+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[TMP1]], i32 0
286+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[P]], i64 [[TMP2]]
287+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[TMP3]], i32 0
288+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP4]], i32 8, <4 x i1> [[TMP0]], <4 x double> poison)
289+
; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP0]], <i1 true, i1 true, i1 true, i1 true>
290+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x double> zeroinitializer, <4 x double> [[WIDE_MASKED_LOAD]]
291+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[PREDPHI]], i32 3
292+
; CHECK-NEXT: store double [[TMP6]], ptr [[P1]], align 8
293+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
294+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
295+
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
296+
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
297+
; CHECK: middle.block:
298+
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
299+
; CHECK: scalar.ph:
300+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
301+
; CHECK-NEXT: br label [[BODY:%.*]]
302+
; CHECK: body:
303+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[NEXT:%.*]], [[ELSE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
304+
; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[IV]] to i32
305+
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[TMP8]], 0
306+
; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE]]
307+
; CHECK: then:
308+
; CHECK-NEXT: [[ZEXT:%.*]] = zext nneg i32 [[TMP8]] to i64
309+
; CHECK-NEXT: [[IDX1:%.*]] = getelementptr double, ptr [[P]], i64 [[ZEXT]]
310+
; CHECK-NEXT: [[IDX2:%.*]] = getelementptr double, ptr [[P]], i64 [[ZEXT]]
311+
; CHECK-NEXT: [[TMP9:%.*]] = load double, ptr [[IDX2]], align 8
312+
; CHECK-NEXT: br label [[ELSE]]
313+
; CHECK: else:
314+
; CHECK-NEXT: [[PHI:%.*]] = phi double [ [[TMP9]], [[THEN]] ], [ 0.000000e+00, [[BODY]] ]
315+
; CHECK-NEXT: store double [[PHI]], ptr [[P1]], align 8
316+
; CHECK-NEXT: [[NEXT]] = add i64 [[IV]], 1
317+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[NEXT]], 0
318+
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT]], label [[BODY]], !llvm.loop [[LOOP18:![0-9]+]]
319+
; CHECK: exit:
320+
; CHECK-NEXT: ret void
321+
;
322+
entry:
323+
br label %body
324+
325+
body:
326+
%iv = phi i64 [ %next, %else ], [ 0, %entry ]
327+
%0 = trunc i64 %iv to i32
328+
%c = icmp eq i32 %0, 0
329+
br i1 %c, label %then, label %else
330+
331+
then:
332+
%zext = zext nneg i32 %0 to i64
333+
%idx1 = getelementptr double, ptr %p, i64 %zext
334+
%idx2 = getelementptr double, ptr %p, i64 %zext
335+
%1 = load double, ptr %idx2, align 8
336+
br label %else
337+
338+
else:
339+
%phi = phi double [ %1, %then ], [ 0.000000e+00, %body ]
340+
store double %phi, ptr %p1, align 8
341+
%next = add i64 %iv, 1
342+
%cmp = icmp eq i64 %next, 0
343+
br i1 %cmp, label %exit, label %body
344+
345+
exit:
346+
ret void
347+
}
348+
271349
; Preserve poison-generating flags from 'sdiv' and 'getelementptr' feeding a masked gather.
272350
define void @preserve_vector_exact_no_addr(ptr noalias nocapture readonly %input,
273351
ptr %output) local_unnamed_addr #0 {

llvm/test/Transforms/LoopVectorize/vplan-printing.ll

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -806,6 +806,47 @@ exit:
806806
ret void
807807
}
808808

809+
define void @zext_nneg(ptr noalias %p, ptr noalias %p1) {
810+
; CHECK-LABEL: LV: Checking a loop in 'zext_nneg'
811+
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
812+
; CHECK-NEXT: Live-in vp<%0> = vector-trip-count
813+
; CHECK-NEXT: Live-in ir<0> = original trip-count
814+
; CHECK-EMPTY:
815+
; CHECK-NEXT: vector.ph:
816+
; CHECK-NEXT: Successor(s): vector loop
817+
; CHECK-EMPTY:
818+
; CHECK-NEXT: <x1> vector loop: {
819+
; CHECK-NEXT: vector.body:
820+
; CHECK-NEXT: EMIT vp<%1> = CANONICAL-INDUCTION ir<0>, vp<%8>
821+
; CHECK-NEXT: vp<%2> = DERIVED-IV ir<0> + vp<%1> * ir<1> (truncated to i32)
822+
; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
823+
; CHECK-NEXT: CLONE ir<%zext> = zext nneg vp<%3>
824+
; CHECK-NEXT: CLONE ir<%idx2> = getelementptr ir<%p>, ir<%zext>
825+
; CHECK-NEXT: WIDEN ir<%1> = load ir<%idx2>
826+
; CHECK-NEXT: REPLICATE store ir<%1>, ir<%p1>
827+
; CHECK-NEXT: EMIT vp<%8> = VF * UF + nuw vp<%1>
828+
; CHECK-NEXT: EMIT branch-on-count vp<%8>, vp<%0>
829+
; CHECK-NEXT: No successors
830+
; CHECK-NEXT: }
831+
;
832+
entry:
833+
br label %body
834+
835+
body:
836+
%iv = phi i64 [ %next, %body ], [ 0, %entry ]
837+
%0 = trunc i64 %iv to i32
838+
%zext = zext nneg i32 %0 to i64
839+
%idx2 = getelementptr double, ptr %p, i64 %zext
840+
%1 = load double, ptr %idx2, align 8
841+
store double %1, ptr %p1, align 8
842+
%next = add i64 %iv, 1
843+
%cmp = icmp eq i64 %next, 0
844+
br i1 %cmp, label %exit, label %body
845+
846+
exit:
847+
ret void
848+
}
849+
809850
!llvm.dbg.cu = !{!0}
810851
!llvm.module.flags = !{!3, !4}
811852

0 commit comments

Comments
 (0)