Skip to content

Commit e094c0f

Browse files
authored
[SandboxVec][Legality] Don't vectorize when instructions repeat (#124479)
This patch adds a legality check that checks for repeated instrs in a bundle and won't vectorize if such pattern is found.
1 parent 62f6d63 commit e094c0f

File tree

4 files changed

+88
-0
lines changed

4 files changed

+88
-0
lines changed

llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ enum class ResultReason {
9292
DiffMathFlags,
9393
DiffWrapFlags,
9494
DiffBBs,
95+
RepeatedInstrs,
9596
NotConsecutive,
9697
CantSchedule,
9798
Unimplemented,
@@ -130,6 +131,8 @@ struct ToStr {
130131
return "DiffWrapFlags";
131132
case ResultReason::DiffBBs:
132133
return "DiffBBs";
134+
case ResultReason::RepeatedInstrs:
135+
return "RepeatedInstrs";
133136
case ResultReason::NotConsecutive:
134137
return "NotConsecutive";
135138
case ResultReason::CantSchedule:

llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,10 @@ const LegalityResult &LegalityAnalysis::canVectorize(ArrayRef<Value *> Bndl,
219219
if (any_of(drop_begin(Bndl),
220220
[BB](auto *V) { return cast<Instruction>(V)->getParent() != BB; }))
221221
return createLegalityResult<Pack>(ResultReason::DiffBBs);
222+
// Pack if instructions repeat, i.e., require some sort of broadcast.
223+
SmallPtrSet<Value *, 8> Unique(Bndl.begin(), Bndl.end());
224+
if (Unique.size() != Bndl.size())
225+
return createLegalityResult<Pack>(ResultReason::RepeatedInstrs);
222226

223227
auto CollectDescrs = getHowToCollectValues(Bndl);
224228
if (CollectDescrs.hasVectorInputs()) {
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="bottom-up-vec<>" %s -S | FileCheck %s
3+
4+
define i32 @repeated_splat(ptr %ptr, i32 %v) #0 {
5+
; CHECK-LABEL: define i32 @repeated_splat(
6+
; CHECK-SAME: ptr [[PTR:%.*]], i32 [[V:%.*]]) {
7+
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 0
8+
; CHECK-NEXT: [[VECL:%.*]] = load <2 x i32>, ptr [[GEP0]], align 4
9+
; CHECK-NEXT: [[SPLAT:%.*]] = add i32 [[V]], 0
10+
; CHECK-NEXT: [[PACK:%.*]] = insertelement <2 x i32> poison, i32 [[SPLAT]], i32 0
11+
; CHECK-NEXT: [[PACK1:%.*]] = insertelement <2 x i32> [[PACK]], i32 [[SPLAT]], i32 1
12+
; CHECK-NEXT: [[VEC:%.*]] = mul <2 x i32> [[VECL]], [[PACK1]]
13+
; CHECK-NEXT: store <2 x i32> [[VEC]], ptr [[GEP0]], align 4
14+
; CHECK-NEXT: ret i32 0
15+
;
16+
%gep0 = getelementptr inbounds i32, ptr %ptr, i64 0
17+
%gep1 = getelementptr inbounds i32, ptr %ptr, i64 1
18+
%ld0 = load i32, ptr %gep0, align 4
19+
%ld1 = load i32, ptr %gep1, align 4
20+
%splat = add i32 %v, 0
21+
%add0 = mul i32 %ld0, %splat
22+
%add1 = mul i32 %ld1, %splat
23+
store i32 %add0, ptr %gep0, align 4
24+
store i32 %add1, ptr %gep1, align 4
25+
ret i32 0
26+
}
27+
28+
define i32 @repeated_partial(ptr %ptr, i32 %v) #0 {
29+
; CHECK-LABEL: define i32 @repeated_partial(
30+
; CHECK-SAME: ptr [[PTR:%.*]], i32 [[V:%.*]]) {
31+
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 0
32+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 1
33+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 3
34+
; CHECK-NEXT: [[LD0:%.*]] = load i32, ptr [[GEP0]], align 4
35+
; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[GEP1]], align 4
36+
; CHECK-NEXT: [[LD3:%.*]] = load i32, ptr [[GEP3]], align 4
37+
; CHECK-NEXT: [[PACK:%.*]] = insertelement <4 x i32> poison, i32 [[LD0]], i32 0
38+
; CHECK-NEXT: [[PACK1:%.*]] = insertelement <4 x i32> [[PACK]], i32 [[LD1]], i32 1
39+
; CHECK-NEXT: [[PACK2:%.*]] = insertelement <4 x i32> [[PACK1]], i32 [[LD1]], i32 2
40+
; CHECK-NEXT: [[PACK3:%.*]] = insertelement <4 x i32> [[PACK2]], i32 [[LD3]], i32 3
41+
; CHECK-NEXT: [[VECL:%.*]] = load <4 x i32>, ptr [[GEP0]], align 4
42+
; CHECK-NEXT: [[SPLAT:%.*]] = add i32 [[V]], 0
43+
; CHECK-NEXT: [[VEC:%.*]] = mul <4 x i32> [[VECL]], [[PACK3]]
44+
; CHECK-NEXT: store <4 x i32> [[VEC]], ptr [[GEP0]], align 4
45+
; CHECK-NEXT: ret i32 0
46+
;
47+
%gep0 = getelementptr inbounds i32, ptr %ptr, i64 0
48+
%gep1 = getelementptr inbounds i32, ptr %ptr, i64 1
49+
%gep2 = getelementptr inbounds i32, ptr %ptr, i64 2
50+
%gep3 = getelementptr inbounds i32, ptr %ptr, i64 3
51+
%ld0 = load i32, ptr %gep0, align 4
52+
%ld1 = load i32, ptr %gep1, align 4
53+
%ld2 = load i32, ptr %gep2, align 4
54+
%ld3 = load i32, ptr %gep3, align 4
55+
%splat = add i32 %v, 0
56+
%add0 = mul i32 %ld0, %ld0
57+
%add1 = mul i32 %ld1, %ld1
58+
%add2 = mul i32 %ld2, %ld1
59+
%add3 = mul i32 %ld3, %ld3
60+
store i32 %add0, ptr %gep0, align 4
61+
store i32 %add1, ptr %gep1, align 4
62+
store i32 %add2, ptr %gep2, align 4
63+
store i32 %add3, ptr %gep3, align 4
64+
ret i32 0
65+
}

llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,22 @@ define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg, float
225225
Legality.canVectorize({Ld0, Ld1}, /*SkipScheduling=*/true);
226226
EXPECT_TRUE(isa<sandboxir::Widen>(Result));
227227
}
228+
{
229+
// Check Repeated instructions (splat)
230+
const auto &Result =
231+
Legality.canVectorize({Ld0, Ld0}, /*SkipScheduling=*/true);
232+
EXPECT_TRUE(isa<sandboxir::Pack>(Result));
233+
EXPECT_EQ(cast<sandboxir::Pack>(Result).getReason(),
234+
sandboxir::ResultReason::RepeatedInstrs);
235+
}
236+
{
237+
// Check Repeated instructions (not splat)
238+
const auto &Result =
239+
Legality.canVectorize({Ld0, Ld1, Ld0}, /*SkipScheduling=*/true);
240+
EXPECT_TRUE(isa<sandboxir::Pack>(Result));
241+
EXPECT_EQ(cast<sandboxir::Pack>(Result).getReason(),
242+
sandboxir::ResultReason::RepeatedInstrs);
243+
}
228244
}
229245

230246
TEST_F(LegalityTest, LegalitySchedule) {

0 commit comments

Comments
 (0)