Skip to content

Commit e2490b7

Browse files
committed
[AMDGPU] New test case where it is better not to form FMA
1 parent 2697ffd commit e2490b7

File tree

1 file changed

+17
-0
lines changed

1 file changed

+17
-0
lines changed

llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,23 @@ define amdgpu_ps float @fmac_sequence_innermost_fmul_multiple_use(float inreg %a
257257
ret float %t7
258258
}
259259

260+
; "fmul %m, 2.0" could select to an FMA instruction, but it is no better than
261+
; selecting it as a multiply. In some cases the multiply is better because
262+
; SIFoldOperands can fold it into a previous instruction as an output modifier.
263+
define amdgpu_ps float @fma_vs_output_modifier(float %x, i32 %n) #0 {
264+
; GCN-LABEL: fma_vs_output_modifier:
265+
; GCN: ; %bb.0:
266+
; GCN-NEXT: v_cvt_f32_i32_e64 v1, v1 mul:2
267+
; GCN-NEXT: v_mul_f32_e32 v0, v0, v0
268+
; GCN-NEXT: v_mul_f32_e32 v0, v0, v1
269+
; GCN-NEXT: ; return to shader part epilog
270+
%s = sitofp i32 %n to float
271+
%m = fmul contract float %x, %x
272+
%a = fmul contract float %m, 2.0
273+
%r = fmul reassoc nsz float %a, %s
274+
ret float %r
275+
}
276+
260277
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
261278
declare float @llvm.maxnum.f32(float, float) #1
262279

0 commit comments

Comments
 (0)