@@ -78,4 +78,73 @@ else: ; preds = %else.if.cond
78
78
unreachable
79
79
}
80
80
81
+ define amdgpu_ps { <4 x float > } @test_return_to_epilog_with_optimized_kill (float %val ) #0 {
82
+ ; GCN-LABEL: name: test_return_to_epilog_with_optimized_kill
83
+ ; GCN: bb.0.entry:
84
+ ; GCN: successors: %bb.1(0x40000000), %bb.4(0x40000000)
85
+ ; GCN: liveins: $vgpr0
86
+ ; GCN: renamable $vgpr1 = nofpexcept V_RCP_F32_e32 $vgpr0, implicit $mode, implicit $exec
87
+ ; GCN: nofpexcept V_CMP_NGT_F32_e32 0, killed $vgpr1, implicit-def $vcc, implicit $mode, implicit $exec
88
+ ; GCN: $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
89
+ ; GCN: renamable $sgpr0_sgpr1 = S_XOR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc
90
+ ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec
91
+ ; GCN: bb.1.flow.preheader:
92
+ ; GCN: successors: %bb.2(0x80000000)
93
+ ; GCN: liveins: $vgpr0, $sgpr0_sgpr1
94
+ ; GCN: nofpexcept V_CMP_NGT_F32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $mode, implicit $exec
95
+ ; GCN: renamable $sgpr2_sgpr3 = S_MOV_B64 0
96
+ ; GCN: bb.2.flow:
97
+ ; GCN: successors: %bb.3(0x04000000), %bb.2(0x7c000000)
98
+ ; GCN: liveins: $vcc, $sgpr0_sgpr1, $sgpr2_sgpr3
99
+ ; GCN: renamable $sgpr4_sgpr5 = S_AND_B64 $exec, renamable $vcc, implicit-def $scc
100
+ ; GCN: renamable $sgpr2_sgpr3 = S_OR_B64 killed renamable $sgpr4_sgpr5, killed renamable $sgpr2_sgpr3, implicit-def $scc
101
+ ; GCN: $exec = S_ANDN2_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
102
+ ; GCN: S_CBRANCH_EXECNZ %bb.2, implicit $exec
103
+ ; GCN: bb.3.Flow:
104
+ ; GCN: successors: %bb.4(0x80000000)
105
+ ; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
106
+ ; GCN: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
107
+ ; GCN: bb.4.Flow1:
108
+ ; GCN: successors: %bb.5(0x40000000), %bb.6(0x40000000)
109
+ ; GCN: liveins: $sgpr0_sgpr1
110
+ ; GCN: renamable $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 killed renamable $sgpr0_sgpr1, implicit-def $exec, implicit-def $scc, implicit $exec
111
+ ; GCN: $exec = S_XOR_B64 $exec, renamable $sgpr0_sgpr1, implicit-def $scc
112
+ ; GCN: S_CBRANCH_EXECZ %bb.6, implicit $exec
113
+ ; GCN: bb.5.kill0:
114
+ ; GCN: successors: %bb.6(0x80000000)
115
+ ; GCN: liveins: $sgpr0_sgpr1
116
+ ; GCN: $exec = S_MOV_B64 0
117
+ ; GCN: bb.6.end:
118
+ ; GCN: successors: %bb.7(0x40000000), %bb.8(0x40000000)
119
+ ; GCN: liveins: $sgpr0_sgpr1
120
+ ; GCN: $exec = S_OR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def $scc
121
+ ; GCN: S_CBRANCH_EXECZ %bb.7, implicit $exec
122
+ ; GCN: S_BRANCH %bb.8
123
+ ; GCN: bb.7:
124
+ ; GCN: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
125
+ ; GCN: S_ENDPGM 0
126
+ ; GCN: bb.8:
127
+ entry:
128
+ %.i0 = fdiv reassoc nnan nsz arcp contract afn float 1 .000000e+00 , %val
129
+ %cmp0 = fcmp olt float %.i0 , 0 .000000e+00
130
+ br i1 %cmp0 , label %kill0 , label %flow
131
+
132
+ kill0: ; preds = %entry
133
+ call void @llvm.amdgcn.kill (i1 false )
134
+ br label %end
135
+
136
+ flow: ; preds = %entry
137
+ %cmp1 = fcmp olt float %val , 0 .000000e+00
138
+ br i1 %cmp1 , label %flow , label %end
139
+
140
+ kill1: ; preds = %flow
141
+ call void @llvm.amdgcn.kill (i1 false )
142
+ br label %end
143
+
144
+ end: ; preds = %kill0, %kill1, %flow
145
+ ret { <4 x float > } undef
146
+ }
147
+
148
+ declare void @llvm.amdgcn.kill (i1 ) #0
149
+
81
150
attributes #0 = { nounwind }
0 commit comments