Skip to content

Commit dd07d60

Browse files
committed
[SLP] Add test case showing a bug when dealing with padded types
We shouldn't vectorize stores of non-packed types (i.e. types that has padding between consecutive variables in a scalar layout, but being packed in a vector layout). The problem was detected as a miscompile in a downstream test case. This is a pre-commit of a test case for the fix in D94446.
1 parent 2f7ec77 commit dd07d60

File tree

1 file changed

+32
-2
lines changed

1 file changed

+32
-2
lines changed

llvm/test/Transforms/SLPVectorizer/X86/bad_types.ll

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ define void @test1(x86_mmx %a, x86_mmx %b, i64* %ptr) {
1515
; CHECK-NEXT: [[A_AND:%.*]] = and i64 [[A_CAST]], 42
1616
; CHECK-NEXT: [[B_AND:%.*]] = and i64 [[B_CAST]], 42
1717
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, i64* [[PTR:%.*]], i32 1
18-
; CHECK-NEXT: store i64 [[A_AND]], i64* [[PTR]]
19-
; CHECK-NEXT: store i64 [[B_AND]], i64* [[GEP]]
18+
; CHECK-NEXT: store i64 [[A_AND]], i64* [[PTR]], align 8
19+
; CHECK-NEXT: store i64 [[B_AND]], i64* [[GEP]], align 8
2020
; CHECK-NEXT: ret void
2121
;
2222
entry:
@@ -110,3 +110,33 @@ bb1: ; preds = %entry
110110
}
111111

112112
declare void @f(i64, i64)
113+
114+
define void @test4(i32 %a, i28* %ptr) {
115+
; Check that we do not vectorize types that are padded to a bigger ones.
116+
; FIXME: This is not correct! See D94446.
117+
;
118+
; CHECK-LABEL: @test4(
119+
; CHECK-NEXT: entry:
120+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[A:%.*]] to i28
121+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i28, i28* [[PTR:%.*]], i32 1
122+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i28, i28* [[PTR]], i32 2
123+
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i28, i28* [[PTR]], i32 3
124+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i28> poison, i28 [[TRUNC]], i32 0
125+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i28> [[TMP0]], i28 [[TRUNC]], i32 1
126+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i28> [[TMP1]], i28 [[TRUNC]], i32 2
127+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i28> [[TMP2]], i28 [[TRUNC]], i32 3
128+
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i28* [[PTR]] to <4 x i28>*
129+
; CHECK-NEXT: store <4 x i28> [[TMP3]], <4 x i28>* [[TMP4]], align 4
130+
; CHECK-NEXT: ret void
131+
;
132+
entry:
133+
%trunc = trunc i32 %a to i28
134+
%gep1 = getelementptr i28, i28* %ptr, i32 1
135+
%gep2 = getelementptr i28, i28* %ptr, i32 2
136+
%gep3 = getelementptr i28, i28* %ptr, i32 3
137+
store i28 %trunc, i28* %ptr
138+
store i28 %trunc, i28* %gep1
139+
store i28 %trunc, i28* %gep2
140+
store i28 %trunc, i28* %gep3
141+
ret void
142+
}

0 commit comments

Comments
 (0)