File tree Expand file tree Collapse file tree 1 file changed +17
-0
lines changed Expand file tree Collapse file tree 1 file changed +17
-0
lines changed Original file line number Diff line number Diff line change @@ -257,6 +257,23 @@ define amdgpu_ps float @fmac_sequence_innermost_fmul_multiple_use(float inreg %a
257
257
ret float %t7
258
258
}
259
259
260
+ ; "fmul %m, 2.0" could select to an FMA instruction, but it is no better than
261
+ ; selecting it as a multiply. In some cases the multiply is better because
262
+ ; SIFoldOperands can fold it into a previous instruction as an output modifier.
263
+ define amdgpu_ps float @fma_vs_output_modifier (float %x , i32 %n ) #0 {
264
+ ; GCN-LABEL: fma_vs_output_modifier:
265
+ ; GCN: ; %bb.0:
266
+ ; GCN-NEXT: v_cvt_f32_i32_e64 v1, v1 mul:2
267
+ ; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
268
+ ; GCN-NEXT: v_mul_f32_e32 v0, v0, v1
269
+ ; GCN-NEXT: ; return to shader part epilog
270
+ %s = sitofp i32 %n to float
271
+ %m = fmul contract float %x , %x
272
+ %a = fmul contract float %m , 2 .0
273
+ %r = fmul reassoc nsz float %a , %s
274
+ ret float %r
275
+ }
276
+
260
277
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
261
278
declare float @llvm.maxnum.f32 (float , float ) #1
262
279
You can’t perform that action at this time.
0 commit comments