-
Notifications
You must be signed in to change notification settings - Fork 14.3k
StructurizeCFG: Use poison instead of undef #130459
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
StructurizeCFG: Use poison instead of undef #130459
Conversation
There are a surprising number of codegen changes from this.
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesThere are a surprising number of codegen changes from this. Patch is 247.65 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130459.diff 26 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index b1f742b838f2a..28683afe0acd7 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -858,10 +858,10 @@ void StructurizeCFG::setPhiValues() {
PhiMap &Map = DeletedPhis[To];
SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
for (const auto &[Phi, Incoming] : Map) {
- Value *Undef = UndefValue::get(Phi->getType());
+ Value *Poison = PoisonValue::get(Phi->getType());
Updater.Initialize(Phi->getType(), "");
- Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
- Updater.AddAvailableValue(To, Undef);
+ Updater.AddAvailableValue(&Func->getEntryBlock(), Poison);
+ Updater.AddAvailableValue(To, Poison);
// Use leader phi's incoming if there is.
auto LeaderIt = PhiClasses.findLeader(Phi);
@@ -890,7 +890,7 @@ void StructurizeCFG::setPhiValues() {
if (Updater.HasValueForBlock(UB))
continue;
- Updater.AddAvailableValue(UB, Undef);
+ Updater.AddAvailableValue(UB, Poison);
}
for (BasicBlock *FI : From)
@@ -1181,9 +1181,9 @@ void StructurizeCFG::rebuildSSA() {
continue;
if (!Initialized) {
- Value *Undef = UndefValue::get(I.getType());
+ Value *Poison = PoisonValue::get(I.getType());
Updater.Initialize(I.getType(), "");
- Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
+ Updater.AddAvailableValue(&Func->getEntryBlock(), Poison);
Updater.AddAvailableValue(BB, &I);
Initialized = true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
index cc768a2cdf61f..5fa991cd27785 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
@@ -199,33 +199,31 @@ define amdgpu_kernel void @break_loop(i32 %arg) {
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_subrev_u32_e32 v0, s0, v0
; CHECK-NEXT: s_mov_b64 s[0:1], 0
-; CHECK-NEXT: s_branch .LBB5_3
-; CHECK-NEXT: .LBB5_1: ; %bb4
-; CHECK-NEXT: ; in Loop: Header=BB5_3 Depth=1
-; CHECK-NEXT: global_load_dword v2, v[0:1], off glc
-; CHECK-NEXT: s_waitcnt vmcnt(0)
-; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
-; CHECK-NEXT: v_cmp_ge_i32_e32 vcc, v0, v2
-; CHECK-NEXT: s_and_b64 s[4:5], exec, vcc
-; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
-; CHECK-NEXT: .LBB5_2: ; %Flow
-; CHECK-NEXT: ; in Loop: Header=BB5_3 Depth=1
+; CHECK-NEXT: s_branch .LBB5_2
+; CHECK-NEXT: .LBB5_1: ; %Flow
+; CHECK-NEXT: ; in Loop: Header=BB5_2 Depth=1
; CHECK-NEXT: s_and_b64 s[4:5], exec, s[2:3]
; CHECK-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
; CHECK-NEXT: s_andn2_b64 exec, exec, s[0:1]
-; CHECK-NEXT: s_cbranch_execz .LBB5_5
-; CHECK-NEXT: .LBB5_3: ; %bb1
+; CHECK-NEXT: s_cbranch_execz .LBB5_4
+; CHECK-NEXT: .LBB5_2: ; %bb1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: v_add_u32_e32 v1, 1, v1
; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
; CHECK-NEXT: s_and_b64 s[4:5], exec, -1
; CHECK-NEXT: v_cmp_le_i32_e32 vcc, 0, v1
; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
-; CHECK-NEXT: s_cbranch_vccz .LBB5_1
-; CHECK-NEXT: ; %bb.4: ; in Loop: Header=BB5_3 Depth=1
-; CHECK-NEXT: ; implicit-def: $vgpr1
-; CHECK-NEXT: s_branch .LBB5_2
-; CHECK-NEXT: .LBB5_5: ; %bb9
+; CHECK-NEXT: s_cbranch_vccnz .LBB5_1
+; CHECK-NEXT: ; %bb.3: ; %bb4
+; CHECK-NEXT: ; in Loop: Header=BB5_2 Depth=1
+; CHECK-NEXT: global_load_dword v2, v[0:1], off glc
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
+; CHECK-NEXT: v_cmp_ge_i32_e32 vcc, v0, v2
+; CHECK-NEXT: s_and_b64 s[4:5], exec, vcc
+; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
+; CHECK-NEXT: s_branch .LBB5_1
+; CHECK-NEXT: .LBB5_4: ; %bb9
; CHECK-NEXT: s_endpgm
bb:
%id = call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
index e70e34fa0ba5d..3116b5d59a097 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
@@ -646,13 +646,13 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX908-NEXT: v_add_f32_e32 v9, v9, v15
; GFX908-NEXT: v_add_f32_e32 v10, v10, v12
; GFX908-NEXT: v_add_f32_e32 v11, v11, v13
-; GFX908-NEXT: s_mov_b64 s[20:21], -1
; GFX908-NEXT: s_branch .LBB3_4
; GFX908-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2
; GFX908-NEXT: s_mov_b64 s[20:21], s[16:17]
; GFX908-NEXT: s_andn2_b64 vcc, exec, s[20:21]
; GFX908-NEXT: s_cbranch_vccz .LBB3_4
; GFX908-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1
+; GFX908-NEXT: s_mov_b64 s[20:21], -1
; GFX908-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX908-NEXT: ; implicit-def: $sgpr18_sgpr19
; GFX908-NEXT: .LBB3_9: ; %loop.exit.guard
@@ -798,13 +798,13 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX90A-NEXT: v_pk_add_f32 v[8:9], v[8:9], v[26:27]
; GFX90A-NEXT: v_pk_add_f32 v[10:11], v[10:11], v[16:17]
; GFX90A-NEXT: v_pk_add_f32 v[12:13], v[12:13], v[14:15]
-; GFX90A-NEXT: s_mov_b64 s[20:21], -1
; GFX90A-NEXT: s_branch .LBB3_4
; GFX90A-NEXT: .LBB3_7: ; in Loop: Header=BB3_5 Depth=2
; GFX90A-NEXT: s_mov_b64 s[20:21], s[16:17]
; GFX90A-NEXT: s_andn2_b64 vcc, exec, s[20:21]
; GFX90A-NEXT: s_cbranch_vccz .LBB3_4
; GFX90A-NEXT: ; %bb.8: ; in Loop: Header=BB3_2 Depth=1
+; GFX90A-NEXT: s_mov_b64 s[20:21], -1
; GFX90A-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX90A-NEXT: ; implicit-def: $sgpr18_sgpr19
; GFX90A-NEXT: .LBB3_9: ; %loop.exit.guard
diff --git a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
index 36fa7b97b3c77..a6af63b816573 100644
--- a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
+++ b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll
@@ -15,7 +15,7 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: s_mov_b32 s12, 0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_cmp_lg_u32 s52, 0
-; CHECK-NEXT: s_cbranch_scc1 .LBB0_8
+; CHECK-NEXT: s_cbranch_scc1 .LBB0_9
; CHECK-NEXT: ; %bb.1: ; %if.end13.i.i
; CHECK-NEXT: s_cmp_eq_u32 s54, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB0_4
@@ -26,36 +26,36 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s12
; CHECK-NEXT: s_cbranch_vccz .LBB0_5
; CHECK-NEXT: ; %bb.3:
-; CHECK-NEXT: s_mov_b32 s48, 0
-; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12
-; CHECK-NEXT: s_cbranch_vccz .LBB0_6
-; CHECK-NEXT: s_branch .LBB0_7
+; CHECK-NEXT: s_mov_b32 s18, 0
+; CHECK-NEXT: s_branch .LBB0_6
; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: s_mov_b32 s14, s12
; CHECK-NEXT: s_mov_b32 s15, s12
; CHECK-NEXT: s_mov_b32 s13, s12
; CHECK-NEXT: s_mov_b64 s[50:51], s[14:15]
; CHECK-NEXT: s_mov_b64 s[48:49], s[12:13]
-; CHECK-NEXT: s_branch .LBB0_7
+; CHECK-NEXT: s_branch .LBB0_8
; CHECK-NEXT: .LBB0_5: ; %if.then263.i.i
; CHECK-NEXT: v_cmp_lt_f32_e64 s12, s53, 0
-; CHECK-NEXT: s_mov_b32 s48, 1.0
+; CHECK-NEXT: s_mov_b32 s18, 1.0
; CHECK-NEXT: s_mov_b32 s17, 0x7fc00000
+; CHECK-NEXT: .LBB0_6: ; %Flow
+; CHECK-NEXT: s_mov_b32 s48, 1.0
+; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12
; CHECK-NEXT: s_mov_b32 s49, s48
; CHECK-NEXT: s_mov_b32 s50, s48
; CHECK-NEXT: s_mov_b32 s51, s48
-; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12
-; CHECK-NEXT: s_cbranch_vccnz .LBB0_7
-; CHECK-NEXT: .LBB0_6: ; %if.end273.i.i
+; CHECK-NEXT: s_cbranch_vccnz .LBB0_8
+; CHECK-NEXT: ; %bb.7: ; %if.end273.i.i
; CHECK-NEXT: s_add_u32 s12, s8, 40
; CHECK-NEXT: s_addc_u32 s13, s9, 0
-; CHECK-NEXT: s_getpc_b64 s[18:19]
-; CHECK-NEXT: s_add_u32 s18, s18, _Z3dotDv3_fS_@gotpcrel32@lo+4
-; CHECK-NEXT: s_addc_u32 s19, s19, _Z3dotDv3_fS_@gotpcrel32@hi+12
+; CHECK-NEXT: s_getpc_b64 s[20:21]
+; CHECK-NEXT: s_add_u32 s20, s20, _Z3dotDv3_fS_@gotpcrel32@lo+4
+; CHECK-NEXT: s_addc_u32 s21, s21, _Z3dotDv3_fS_@gotpcrel32@hi+12
; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2
-; CHECK-NEXT: s_load_dwordx2 s[18:19], s[18:19], 0x0
+; CHECK-NEXT: s_load_dwordx2 s[20:21], s[20:21], 0x0
; CHECK-NEXT: v_lshlrev_b32_e32 v3, 10, v1
-; CHECK-NEXT: v_add_f32_e64 v1, s17, s48
+; CHECK-NEXT: v_add_f32_e64 v1, s17, s18
; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9]
; CHECK-NEXT: s_mov_b64 s[8:9], s[12:13]
; CHECK-NEXT: s_mov_b32 s12, s14
@@ -67,18 +67,18 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext
; CHECK-NEXT: s_mov_b32 s14, s16
; CHECK-NEXT: s_mov_b32 s48, 0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
+; CHECK-NEXT: s_swappc_b64 s[30:31], s[20:21]
; CHECK-NEXT: s_mov_b64 s[8:9], s[34:35]
; CHECK-NEXT: s_mov_b32 s49, s48
; CHECK-NEXT: s_mov_b32 s50, s48
; CHECK-NEXT: s_mov_b32 s51, s48
-; CHECK-NEXT: .LBB0_7: ; %if.end294.i.i
+; CHECK-NEXT: .LBB0_8: ; %if.end294.i.i
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:12
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; CHECK-NEXT: .LBB0_8: ; %kernel_direct_lighting.exit
+; CHECK-NEXT: .LBB0_9: ; %kernel_direct_lighting.exit
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x20
; CHECK-NEXT: v_mov_b32_e32 v0, s48
; CHECK-NEXT: v_mov_b32_e32 v4, 0
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index e43a021802644..266216c4d8b50 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -13,16 +13,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr17, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: renamable $vgpr31 = COPY $vgpr0, implicit $exec
- ; GFX90A-NEXT: renamable $sgpr33 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s32) from %ir.arg4.kernarg.offset.align.down, align 8, addrspace 4)
+ ; GFX90A-NEXT: renamable $sgpr17 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s32) from %ir.arg4.kernarg.offset.align.down, align 8, addrspace 4)
; GFX90A-NEXT: early-clobber renamable $sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX4_IMM_ec renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s128) from %ir.arg6.kernarg.offset.align.down, align 8, addrspace 4)
- ; GFX90A-NEXT: renamable $sgpr17 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 40, 0 :: (dereferenceable invariant load (s32) from %ir.arg6.kernarg.offset.align.down + 16, align 8, addrspace 4)
+ ; GFX90A-NEXT: renamable $sgpr33 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 40, 0 :: (dereferenceable invariant load (s32) from %ir.arg6.kernarg.offset.align.down + 16, align 8, addrspace 4)
; GFX90A-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27 = S_LOAD_DWORDX4_IMM renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
- ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 16, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset1 + 16, align 16, addrspace 4)
- ; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr33, 0, implicit-def $scc
+ ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 16, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset1 + 16, align 16, addrspace 4)
+ ; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr17, 0, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_CSELECT_B64 -1, 0, implicit killed $scc
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 -1
; GFX90A-NEXT: renamable $sgpr28_sgpr29 = S_XOR_B64 renamable $sgpr12_sgpr13, -1, implicit-def dead $scc
- ; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr33, 8, implicit-def $scc
+ ; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr17, 8, implicit-def $scc
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_CSELECT_B64 -1, 0, implicit killed $scc
; GFX90A-NEXT: renamable $sgpr30_sgpr31 = S_XOR_B64 killed renamable $sgpr18_sgpr19, -1, implicit-def dead $scc
; GFX90A-NEXT: renamable $vgpr3 = V_MOV_B32_e32 0, implicit $exec
@@ -33,7 +33,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.1.bb103:
; GFX90A-NEXT: successors: %bb.58(0x40000000), %bb.2(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr30_sgpr31, implicit-def dead $scc
@@ -41,10 +41,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.2:
; GFX90A-NEXT: successors: %bb.3(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr44, $sgpr45, $sgpr20_sgpr21_sgpr22, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr2, $vgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr46, $sgpr47, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr2, $vgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $sgpr17 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $sgpr23 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
@@ -54,15 +52,17 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.3.Flow17:
; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.57(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr23, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr30 = V_AND_B32_e32 1023, $vgpr31, implicit $exec
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr34_sgpr35, implicit-def dead $scc
+ ; GFX90A-NEXT: renamable $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr17 = V_MOV_B32_e32 0, implicit $exec
; GFX90A-NEXT: S_CBRANCH_VCCZ %bb.57, implicit $vcc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.4.bb15:
; GFX90A-NEXT: successors: %bb.35(0x40000000), %bb.5(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19
+ ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 2, $vgpr2_vgpr3, implicit $exec
; GFX90A-NEXT: renamable $vgpr4 = COPY renamable $sgpr25, implicit $exec
@@ -79,7 +79,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 -1
- ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0
+ ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 0
; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_MOV_B64 0
@@ -107,24 +107,23 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
- ; GFX90A-NEXT: renamable $sgpr17 = IMPLICIT_DEF
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.6.Flow20:
; GFX90A-NEXT: successors: %bb.7(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_...
[truncated]
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 0715c6cc6d11814b8a31bc0368ca080b72810177 c2f7c83683e432cb7c1850de36e25614a3efddbb llvm/lib/Transforms/Scalar/StructurizeCFG.cpp llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll llvm/test/CodeGen/AMDGPU/itofp.i128.bf.ll llvm/test/CodeGen/AMDGPU/itofp.i128.ll llvm/test/CodeGen/AMDGPU/loop_break.ll llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll llvm/test/CodeGen/AMDGPU/multilevel-break.ll llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll llvm/test/CodeGen/AMDGPU/scheduler-rp-calc-one-successor-two-predecessors-bug.ll llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll llvm/test/Transforms/StructurizeCFG/interleaved-loop-order.ll llvm/test/Transforms/StructurizeCFG/loop-break-phi.ll llvm/test/Transforms/StructurizeCFG/loop-continue-phi.ll llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll llvm/test/Transforms/StructurizeCFG/structurizer-keep-perf-md.ll llvm/test/Transforms/StructurizeCFG/workarounds/needs-fix-reducible.ll llvm/test/Transforms/StructurizeCFG/workarounds/needs-fr-ule.ll llvm/test/Transforms/StructurizeCFG/workarounds/needs-unified-loop-exits.ll The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
There are a surprising number of codegen changes from this.