Skip to content

Commit 6261c53

Browse files
committed
[VPlan] Make sure OR VPInstructions are treated as disjoint ops.
Make sure that VPInstructions with OR opcodes are properly registered as disjoint ops. Fixes #87378.
1 parent d8db13e commit 6261c53

File tree

3 files changed

+155
-4
lines changed

3 files changed

+155
-4
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,10 @@ class VPBuilder {
175175

176176
VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL = {},
177177
const Twine &Name = "") {
178-
return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}, DL, Name);
178+
179+
return tryInsertInstruction(new VPInstruction(
180+
Instruction::BinaryOps::Or, {LHS, RHS},
181+
VPRecipeWithIRFlags::DisjointFlagsTy(false), DL, Name));
179182
}
180183

181184
VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -913,16 +913,18 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
913913
WrapFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {}
914914
};
915915

916+
struct DisjointFlagsTy {
917+
char IsDisjoint : 1;
918+
DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {}
919+
};
920+
916921
protected:
917922
struct GEPFlagsTy {
918923
char IsInBounds : 1;
919924
GEPFlagsTy(bool IsInBounds) : IsInBounds(IsInBounds) {}
920925
};
921926

922927
private:
923-
struct DisjointFlagsTy {
924-
char IsDisjoint : 1;
925-
};
926928
struct ExactFlagsTy {
927929
char IsExact : 1;
928930
};
@@ -1016,6 +1018,12 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
10161018
: VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp),
10171019
FMFs(FMFs) {}
10181020

1021+
template <typename IterT>
1022+
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1023+
DisjointFlagsTy DisjointFlags, DebugLoc DL = {})
1024+
: VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::DisjointOp),
1025+
DisjointFlags(DisjointFlags) {}
1026+
10191027
protected:
10201028
template <typename IterT>
10211029
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
@@ -1221,6 +1229,14 @@ class VPInstruction : public VPRecipeWithIRFlags {
12211229
: VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL),
12221230
Opcode(Opcode), Name(Name.str()) {}
12231231

1232+
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1233+
DisjointFlagsTy DisjointFlag, DebugLoc DL = {},
1234+
const Twine &Name = "")
1235+
: VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DisjointFlag, DL),
1236+
Opcode(Opcode), Name(Name.str()) {
1237+
assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
1238+
}
1239+
12241240
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
12251241
FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
12261242

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -p loop-vectorize -mattr="+v" -S %s | FileCheck %s
3+
4+
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
5+
target triple = "riscv64-unknown-linux-gnu"
6+
7+
; Test case for https://github.com/llvm/llvm-project/issues/87378.
8+
define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 %a, i64 %b, i64 %c) {
9+
; CHECK-LABEL: define void @pr87378_vpinstruction_or_drop_poison_generating_flags(
10+
; CHECK-SAME: ptr [[ARG:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
11+
; CHECK-NEXT: entry:
12+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
13+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8
14+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1001, [[TMP1]]
15+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
16+
; CHECK: vector.ph:
17+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
18+
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8
19+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1001, [[TMP3]]
20+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1001, [[N_MOD_VF]]
21+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
22+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8
23+
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 8 x i64> @llvm.experimental.stepvector.nxv8i64()
24+
; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 8 x i64> [[TMP6]], zeroinitializer
25+
; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 8 x i64> [[TMP7]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
26+
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP8]]
27+
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
28+
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8
29+
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP10]]
30+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP11]], i64 0
31+
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
32+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[A]], i64 0
33+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
34+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[B]], i64 0
35+
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
36+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[C]], i64 0
37+
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT3]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
38+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
39+
; CHECK: vector.body:
40+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
41+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
42+
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0
43+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
44+
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
45+
; CHECK-NEXT: [[TMP13:%.*]] = icmp ule <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
46+
; CHECK-NEXT: [[TMP14:%.*]] = icmp ule <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]]
47+
; CHECK-NEXT: [[TMP15:%.*]] = select <vscale x 8 x i1> [[TMP13]], <vscale x 8 x i1> [[TMP14]], <vscale x 8 x i1> zeroinitializer
48+
; CHECK-NEXT: [[TMP16:%.*]] = xor <vscale x 8 x i1> [[TMP13]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer)
49+
; CHECK-NEXT: [[TMP17:%.*]] = or <vscale x 8 x i1> [[TMP15]], [[TMP16]]
50+
; CHECK-NEXT: [[TMP18:%.*]] = icmp ule <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]]
51+
; CHECK-NEXT: [[TMP19:%.*]] = select <vscale x 8 x i1> [[TMP17]], <vscale x 8 x i1> [[TMP18]], <vscale x 8 x i1> zeroinitializer
52+
; CHECK-NEXT: [[TMP20:%.*]] = xor <vscale x 8 x i1> [[TMP14]], shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer)
53+
; CHECK-NEXT: [[TMP21:%.*]] = select <vscale x 8 x i1> [[TMP13]], <vscale x 8 x i1> [[TMP20]], <vscale x 8 x i1> zeroinitializer
54+
; CHECK-NEXT: [[TMP22:%.*]] = or <vscale x 8 x i1> [[TMP19]], [[TMP21]]
55+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 8 x i1> [[TMP19]], <vscale x 8 x i64> [[BROADCAST_SPLAT6]], <vscale x 8 x i64> shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 poison, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
56+
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <vscale x 8 x i64> [[PREDPHI]], i32 0
57+
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[TMP23]]
58+
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i16, ptr [[TMP24]], i32 0
59+
; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> zeroinitializer, ptr [[TMP25]], i32 2, <vscale x 8 x i1> [[TMP22]])
60+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
61+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
62+
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
63+
; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
64+
; CHECK: middle.block:
65+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1001, [[N_VEC]]
66+
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
67+
; CHECK: scalar.ph:
68+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
69+
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
70+
; CHECK: loop.header:
71+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
72+
; CHECK-NEXT: [[C_1:%.*]] = icmp ule i64 [[IV]], [[A]]
73+
; CHECK-NEXT: br i1 [[C_1]], label [[THEN_1:%.*]], label [[ELSE_1:%.*]]
74+
; CHECK: then.1:
75+
; CHECK-NEXT: [[C_2:%.*]] = icmp ule i64 [[IV]], [[B]]
76+
; CHECK-NEXT: br i1 [[C_2]], label [[ELSE_1]], label [[MERGE:%.*]]
77+
; CHECK: else.1:
78+
; CHECK-NEXT: [[C_3:%.*]] = icmp ule i64 [[IV]], [[C]]
79+
; CHECK-NEXT: br i1 [[C_3]], label [[THEN_2:%.*]], label [[LOOP_LATCH]]
80+
; CHECK: then.2:
81+
; CHECK-NEXT: br label [[MERGE]]
82+
; CHECK: merge:
83+
; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ poison, [[THEN_1]] ], [ [[IV]], [[THEN_2]] ]
84+
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[IDX]]
85+
; CHECK-NEXT: store i16 0, ptr [[GETELEMENTPTR]], align 2
86+
; CHECK-NEXT: br label [[LOOP_LATCH]]
87+
; CHECK: loop.latch:
88+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
89+
; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i64 [[IV]], 1000
90+
; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
91+
; CHECK: exit:
92+
; CHECK-NEXT: ret void
93+
;
94+
entry:
95+
br label %loop.header
96+
97+
loop.header:
98+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
99+
%c.1 = icmp ule i64 %iv, %a
100+
br i1 %c.1, label %then.1, label %else.1
101+
102+
then.1:
103+
%c.2 = icmp ule i64 %iv, %b
104+
br i1 %c.2, label %else.1, label %merge
105+
106+
else.1:
107+
%c.3 = icmp ule i64 %iv, %c
108+
br i1 %c.3, label %then.2, label %loop.latch
109+
110+
then.2:
111+
br label %merge
112+
113+
merge:
114+
%idx = phi i64 [ poison, %then.1 ], [ %iv, %then.2 ]
115+
%getelementptr = getelementptr i16, ptr %arg, i64 %idx
116+
store i16 0, ptr %getelementptr, align 2
117+
br label %loop.latch
118+
119+
loop.latch:
120+
%iv.next = add i64 %iv, 1
121+
%icmp = icmp eq i64 %iv, 1000
122+
br i1 %icmp, label %exit, label %loop.header
123+
124+
exit:
125+
ret void
126+
}
127+
;.
128+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
129+
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
130+
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
131+
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
132+
;.

0 commit comments

Comments
 (0)