Skip to content

Commit cf284f6

Browse files
committed
[LSV] Change the default value of InstertElement to poison
This patch is changing the InsertElement's placeholder to poison without changing the LSV's behavior. Regardless of whether `StoreTy` is FixedVectorType or not, the poison value will be overwritten with a different value. Therefore, whether the InsertElement's placeholder is poison or undef will not affect the result of the program. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D111005
1 parent c274384 commit cf284f6

File tree

4 files changed

+9
-9
lines changed

4 files changed

+9
-9
lines changed

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1114,7 +1114,7 @@ bool Vectorizer::vectorizeStoreChain(
11141114
std::tie(First, Last) = getBoundaryInstrs(Chain);
11151115
Builder.SetInsertPoint(&*Last);
11161116

1117-
Value *Vec = UndefValue::get(VecTy);
1117+
Value *Vec = PoisonValue::get(VecTy);
11181118

11191119
if (VecStoreTy) {
11201120
unsigned VecWidth = VecStoreTy->getNumElements();

llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ define amdgpu_kernel void @merge_global_store_4_constants_i64(i64 addrspace(1)*
221221
; CHECK: [[LOAD:%[^ ]+]] = load <2 x i32>
222222
; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i32> [[LOAD]], i32 0
223223
; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i32> [[LOAD]], i32 1
224-
; CHECK: [[INSERT0:%[^ ]+]] = insertelement <2 x i32> undef, i32 [[ELT0]], i32 0
224+
; CHECK: [[INSERT0:%[^ ]+]] = insertelement <2 x i32> poison, i32 [[ELT0]], i32 0
225225
; CHECK: [[INSERT1:%[^ ]+]] = insertelement <2 x i32> [[INSERT0]], i32 [[ELT1]], i32 1
226226
; CHECK: store <2 x i32> [[INSERT1]]
227227
define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
@@ -260,7 +260,7 @@ define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32_nonzero_base(
260260
; CHECK: [[LOAD:%[^ ]+]] = load <2 x i32>
261261
; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i32> [[LOAD]], i32 0
262262
; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i32> [[LOAD]], i32 1
263-
; CHECK: [[INSERT0:%[^ ]+]] = insertelement <2 x i32> undef, i32 [[ELT1]], i32 0
263+
; CHECK: [[INSERT0:%[^ ]+]] = insertelement <2 x i32> poison, i32 [[ELT1]], i32 0
264264
; CHECK: [[INSERT1:%[^ ]+]] = insertelement <2 x i32> [[INSERT0]], i32 [[ELT0]], i32 1
265265
; CHECK: store <2 x i32> [[INSERT1]]
266266
define amdgpu_kernel void @merge_global_store_2_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {

llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/pointer-elements.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ entry:
7474

7575
; CHECK-LABEL: @merge_store_ptr64_i64(
7676
; CHECK: [[ELT0:%[^ ]+]] = ptrtoint i8 addrspace(1)* %ptr0 to i64
77-
; CHECK: insertelement <2 x i64> undef, i64 [[ELT0]], i32 0
77+
; CHECK: insertelement <2 x i64> poison, i64 [[ELT0]], i32 0
7878
; CHECK: store <2 x i64>
7979
define amdgpu_kernel void @merge_store_ptr64_i64(i64 addrspace(1)* nocapture %a, i8 addrspace(1)* %ptr0, i64 %val1) #0 {
8080
entry:
@@ -135,7 +135,7 @@ entry:
135135

136136
; CHECK-LABEL: @merge_store_ptr32_i32(
137137
; CHECK: [[ELT0:%[^ ]+]] = ptrtoint i8 addrspace(3)* %ptr0 to i32
138-
; CHECK: insertelement <2 x i32> undef, i32 [[ELT0]], i32 0
138+
; CHECK: insertelement <2 x i32> poison, i32 [[ELT0]], i32 0
139139
; CHECK: store <2 x i32>
140140
define amdgpu_kernel void @merge_store_ptr32_i32(i32 addrspace(3)* nocapture %a, i8 addrspace(3)* %ptr0, i32 %val1) #0 {
141141
entry:
@@ -275,7 +275,7 @@ entry:
275275

276276
; CHECK-LABEL: @merge_store_ptr64_f64(
277277
; CHECK: [[ELT0_INT:%[^ ]+]] = ptrtoint i8 addrspace(1)* %ptr0 to i64
278-
; CHECK: insertelement <2 x i64> undef, i64 [[ELT0_INT]], i32 0
278+
; CHECK: insertelement <2 x i64> poison, i64 [[ELT0_INT]], i32 0
279279
; CHECK: [[ELT1_INT:%[^ ]+]] = bitcast double %val1 to i64
280280
; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1_INT]], i32 1
281281
; CHECK: store <2 x i64>
@@ -292,7 +292,7 @@ entry:
292292

293293
; CHECK-LABEL: @merge_store_f64_ptr64(
294294
; CHECK: [[ELT0_INT:%[^ ]+]] = bitcast double %val0 to i64
295-
; CHECK: insertelement <2 x i64> undef, i64 [[ELT0_INT]], i32 0
295+
; CHECK: insertelement <2 x i64> poison, i64 [[ELT0_INT]], i32 0
296296
; CHECK: [[ELT1_INT:%[^ ]+]] = ptrtoint i8 addrspace(1)* %ptr1 to i64
297297
; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1_INT]], i32 1
298298
; CHECK: store <2 x i64>

llvm/test/Transforms/LoadStoreVectorizer/int_sideeffect.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ define void @test_sideeffect(float* %p) {
1717
; CHECK-NEXT: [[L34:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
1818
; CHECK-NEXT: call void @llvm.sideeffect()
1919
; CHECK-NEXT: call void @llvm.sideeffect()
20-
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> undef, float [[L01]], i32 0
20+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> poison, float [[L01]], i32 0
2121
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float [[L12]], i32 1
2222
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[L23]], i32 2
2323
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[L34]], i32 3
@@ -55,7 +55,7 @@ define void @test_inaccessiblememonly(float* %p) {
5555
; CHECK-NEXT: [[L34:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
5656
; CHECK-NEXT: call void @foo() #[[ATTR1:[0-9]+]]
5757
; CHECK-NEXT: call void @foo() #[[ATTR1]]
58-
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> undef, float [[L01]], i32 0
58+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> poison, float [[L01]], i32 0
5959
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float [[L12]], i32 1
6060
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[L23]], i32 2
6161
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[L34]], i32 3

0 commit comments

Comments
 (0)