Skip to content

Commit 6609eb3

Browse files
committed
AMDGPU: Replace insertelement undef with poison in cases with manual updates
I had to manually intervene in a few tests. fcanonicalize.f16.ll is directly sensitive to undef vs. poison.
1 parent ddaf38a commit 6609eb3

File tree

3 files changed

+22
-22
lines changed

3 files changed

+22
-22
lines changed

llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ define <2 x half> @v_test_canonicalize_build_vector_v2f16(half %lo, half %hi) #1
238238
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
239239
; GFX11-FAKE16-NEXT: v_pk_max_f16 v0, v0, v0
240240
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
241-
%ins0 = insertelement <2 x half> undef, half %lo, i32 0
241+
%ins0 = insertelement <2 x half> poison, half %lo, i32 0
242242
%ins1 = insertelement <2 x half> %ins0, half %hi, i32 1
243243
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %ins1)
244244
ret <2 x half> %canonicalized
@@ -2581,7 +2581,7 @@ define <2 x half> @v_test_canonicalize_reg_undef_v2f16(half %val) #1 {
25812581
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
25822582
; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, 0
25832583
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
2584-
%vec = insertelement <2 x half> undef, half %val, i32 0
2584+
%vec = insertelement <2 x half> poison, half %val, i32 0
25852585
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec)
25862586
ret <2 x half> %canonicalized
25872587
}
@@ -2622,7 +2622,7 @@ define <2 x half> @v_test_canonicalize_undef_reg_v2f16(half %val) #1 {
26222622
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
26232623
; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
26242624
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
2625-
%vec = insertelement <2 x half> undef, half %val, i32 1
2625+
%vec = insertelement <2 x half> poison, half %val, i32 1
26262626
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec)
26272627
ret <2 x half> %canonicalized
26282628
}
@@ -2785,7 +2785,7 @@ define <2 x half> @v_test_canonicalize_reg_k_v2f16(half %val) #1 {
27852785
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
27862786
; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, 2.0
27872787
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
2788-
%vec0 = insertelement <2 x half> undef, half %val, i32 0
2788+
%vec0 = insertelement <2 x half> poison, half %val, i32 0
27892789
%vec1 = insertelement <2 x half> %vec0, half 2.0, i32 1
27902790
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec1)
27912791
ret <2 x half> %canonicalized
@@ -2829,7 +2829,7 @@ define <2 x half> @v_test_canonicalize_k_reg_v2f16(half %val) #1 {
28292829
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
28302830
; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, 2.0, v0
28312831
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
2832-
%vec0 = insertelement <2 x half> undef, half 2.0, i32 0
2832+
%vec0 = insertelement <2 x half> poison, half 2.0, i32 0
28332833
%vec1 = insertelement <2 x half> %vec0, half %val, i32 1
28342834
%canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %vec1)
28352835
ret <2 x half> %canonicalized
@@ -2925,7 +2925,7 @@ define <4 x half> @v_test_canonicalize_reg_undef_undef_undef_v4f16(half %val) #1
29252925
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
29262926
; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, 0
29272927
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
2928-
%vec = insertelement <4 x half> undef, half %val, i32 0
2928+
%vec = insertelement <4 x half> poison, half %val, i32 0
29292929
%canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> %vec)
29302930
ret <4 x half> %canonicalized
29312931
}
@@ -2977,7 +2977,7 @@ define <4 x half> @v_test_canonicalize_reg_reg_undef_undef_v4f16(half %val0, hal
29772977
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
29782978
; GFX11-FAKE16-NEXT: v_pk_max_f16 v0, v0, v0
29792979
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
2980-
%vec0 = insertelement <4 x half> undef, half %val0, i32 0
2980+
%vec0 = insertelement <4 x half> poison, half %val0, i32 0
29812981
%vec1 = insertelement <4 x half> %vec0, half %val1, i32 1
29822982
%canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> %vec1)
29832983
ret <4 x half> %canonicalized
@@ -3035,7 +3035,7 @@ define <4 x half> @v_test_canonicalize_reg_undef_reg_reg_v4f16(half %val0, half
30353035
; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, 0
30363036
; GFX11-FAKE16-NEXT: v_pk_max_f16 v1, v1, v1
30373037
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
3038-
%vec0 = insertelement <4 x half> undef, half %val0, i32 0
3038+
%vec0 = insertelement <4 x half> poison, half %val0, i32 0
30393039
%vec1 = insertelement <4 x half> %vec0, half %val1, i32 2
30403040
%vec2 = insertelement <4 x half> %vec1, half %val2, i32 3
30413041
%canonicalized = call <4 x half> @llvm.canonicalize.v4f16(<4 x half> %vec2)

llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ define amdgpu_vs void @promote_load_from_store_aggr_varoff(<4 x i32> %input) {
134134
; CHECK-NEXT: [[FOO3_UNPACK2:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
135135
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[FOO3_UNPACK2]], i32 2
136136
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i32> [[TMP1]], i32 [[FOO3_UNPACK2]]
137-
; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x i32> %input, i32 [[TMP2]], i64 3
137+
; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x i32> [[INPUT:%.*]], i32 [[TMP2]], i64 3
138138
; CHECK-NEXT: store <4 x i32> [[FOO12]], ptr addrspace(1) @pv1, align 16
139139
; CHECK-NEXT: ret void
140140
;
@@ -344,7 +344,7 @@ define amdgpu_ps void @promote_double_aggr() #0 {
344344
; CHECK-NEXT: [[FOO10:%.*]] = fadd double [[FOO5_FCA_1_EXTRACT]], [[FOO5_FCA_1_EXTRACT]]
345345
; CHECK-NEXT: [[FOO16:%.*]] = fadd double [[FOO10]], [[FOO5_FCA_1_EXTRACT]]
346346
; CHECK-NEXT: [[FOO17:%.*]] = fptrunc double [[FOO16]] to float
347-
; CHECK-NEXT: [[FOO18:%.*]] = insertelement <4 x float> undef, float [[FOO17]], i32 0
347+
; CHECK-NEXT: [[FOO18:%.*]] = insertelement <4 x float> poison, float [[FOO17]], i32 0
348348
; CHECK-NEXT: [[FOO19:%.*]] = insertelement <4 x float> [[FOO18]], float [[FOO17]], i32 1
349349
; CHECK-NEXT: [[FOO20:%.*]] = insertelement <4 x float> [[FOO19]], float [[FOO17]], i32 2
350350
; CHECK-NEXT: [[FOO21:%.*]] = insertelement <4 x float> [[FOO20]], float [[FOO17]], i32 3
@@ -370,7 +370,7 @@ define amdgpu_ps void @promote_double_aggr() #0 {
370370
%foo15 = load double, ptr addrspace(5) %foo14
371371
%foo16 = fadd double %foo13, %foo15
372372
%foo17 = fptrunc double %foo16 to float
373-
%foo18 = insertelement <4 x float> undef, float %foo17, i32 0
373+
%foo18 = insertelement <4 x float> poison, float %foo17, i32 0
374374
%foo19 = insertelement <4 x float> %foo18, float %foo17, i32 1
375375
%foo20 = insertelement <4 x float> %foo19, float %foo17, i32 2
376376
%foo21 = insertelement <4 x float> %foo20, float %foo17, i32 3

llvm/test/CodeGen/AMDGPU/promote-alloca-loadstores.ll

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,15 @@ define amdgpu_kernel void @test_overwrite(i64 %val, i1 %cond) {
99
; CHECK-NEXT: entry:
1010
; CHECK-NEXT: br i1 [[COND]], label [[LOOP:%.*]], label [[END:%.*]]
1111
; CHECK: loop:
12-
; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <3 x i64> [ [[TMP2:%.*]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef>, [[ENTRY:%.*]] ]
13-
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 0
14-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i64> [[PROMOTEALLOCA]], i64 68, i32 0
12+
; CHECK-NEXT: [[PROMOTEALLOCA1:%.*]] = phi <3 x i64> [ [[TMP2:%.*]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef>, [[ENTRY:%.*]] ]
13+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA1]], i32 0
14+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i64> [[PROMOTEALLOCA1]], i64 68, i32 0
1515
; CHECK-NEXT: [[TMP2]] = insertelement <3 x i64> [[TMP1]], i64 32, i32 0
1616
; CHECK-NEXT: [[LOOP_CC:%.*]] = icmp ne i64 [[TMP0]], 68
1717
; CHECK-NEXT: br i1 [[LOOP_CC]], label [[LOOP]], label [[END]]
1818
; CHECK: end:
19-
; CHECK-NEXT: [[PROMOTEALLOCA1:%.*]] = phi <3 x i64> [ [[TMP2]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef>, [[ENTRY]] ]
20-
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA1]], i32 0
19+
; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <3 x i64> [ [[TMP2]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef>, [[ENTRY]] ]
20+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 0
2121
; CHECK-NEXT: ret void
2222
;
2323
entry:
@@ -64,15 +64,15 @@ define amdgpu_kernel void @test_no_overwrite(i64 %val, i1 %cond) {
6464
; CHECK-NEXT: entry:
6565
; CHECK-NEXT: br i1 [[COND]], label [[LOOP:%.*]], label [[END:%.*]]
6666
; CHECK: loop:
67-
; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <3 x i64> [ [[TMP1:%.*]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef>, [[ENTRY:%.*]] ]
68-
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 0
69-
; CHECK-NEXT: [[TMP1]] = insertelement <3 x i64> [[PROMOTEALLOCA]], i64 32, i32 1
67+
; CHECK-NEXT: [[PROMOTEALLOCA1:%.*]] = phi <3 x i64> [ [[TMP1:%.*]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef>, [[ENTRY:%.*]] ]
68+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA1]], i32 0
69+
; CHECK-NEXT: [[TMP1]] = insertelement <3 x i64> [[PROMOTEALLOCA1]], i64 32, i32 1
7070
; CHECK-NEXT: [[LOOP_CC:%.*]] = icmp ne i64 [[TMP0]], 32
7171
; CHECK-NEXT: br i1 [[LOOP_CC]], label [[LOOP]], label [[END]]
7272
; CHECK: end:
73-
; CHECK-NEXT: [[PROMOTEALLOCA1:%.*]] = phi <3 x i64> [ [[TMP1]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef>, [[ENTRY]] ]
74-
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA1]], i32 0
75-
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA1]], i32 1
73+
; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <3 x i64> [ [[TMP1]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef>, [[ENTRY]] ]
74+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 0
75+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 1
7676
; CHECK-NEXT: ret void
7777
;
7878
entry:

0 commit comments

Comments
 (0)