Skip to content

Commit a2d086a

Browse files
authored
[AMDGPU] Fix FMA combine (#119217)
Update the check in the FMA combine to check dot10-insts instead of dot7-insts. The target of the combine, v_dot2_f32_f16, is available only if dot10-insts target feature is enabled. The issue probably dates back to the change that split out dot10-insts out of dot7-insts. As far as I can see, this does not affect any current targets, but if a future target has dot7-insts, but not dot10-insts that would cause a crash ("cannot select") for the input ir in the test.
1 parent e21ab4d commit a2d086a

File tree

2 files changed

+18
-5
lines changed

2 files changed

+18
-5
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14696,7 +14696,7 @@ SDValue SITargetLowering::performFMACombine(SDNode *N,
1469614696
EVT VT = N->getValueType(0);
1469714697
SDLoc SL(N);
1469814698

14699-
if (!Subtarget->hasDot7Insts() || VT != MVT::f32)
14699+
if (!Subtarget->hasDot10Insts() || VT != MVT::f32)
1470014700
return SDValue();
1470114701

1470214702
// FMA((F32)S0.x, (F32)S1. x, FMA((F32)S0.y, (F32)S1.y, (F32)z)) ->

llvm/test/CodeGen/AMDGPU/fdot2.ll

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906
66
; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math=preserve-sign -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-CONTRACT
77
; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-DENORM-CONTRACT
8+
; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -mattr="+dot7-insts,-dot10-insts" -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-DOT10-DISABLED
89
; (fadd (fmul S1.x, S2.x), (fadd (fmul (S1.y, S2.y), z))) -> (fdot2 S1, S2, z)
910

1011
; Tests to make sure fdot2 is not generated when vector elements of dot-product expressions
@@ -21,6 +22,7 @@
2122

2223
; GFX906-CONTRACT: v_mac_f16_e32
2324
; GFX906-DENORM-CONTRACT: v_fma_f16
25+
; GFX906-DOT10-DISABLED: v_fma_f16
2426
define amdgpu_kernel void @dotproduct_f16(ptr addrspace(1) %src1,
2527
ptr addrspace(1) %src2,
2628
ptr addrspace(1) nocapture %dst) {
@@ -44,8 +46,11 @@ entry:
4446
}
4547

4648

47-
; We only want to generate fdot2 if vector element of dot product is converted from f16 to f32
48-
; and the vectors are of type <2 x half>
49+
; We only want to generate fdot2 if:
50+
; - vector element of dot product is converted from f16 to f32, and
51+
; - the vectors are of type <2 x half>, and
52+
; - "dot10-insts" is enabled
53+
4954
; GCN-LABEL: {{^}}dotproduct_f16_f32
5055
; GFX900: v_mad_mix_f32
5156
; GFX900: v_mad_mix_f32
@@ -59,6 +64,7 @@ entry:
5964
; GFX906-CONTRACT: v_dot2_f32_f16
6065

6166
; GFX906-DENORM-CONTRACT: v_dot2_f32_f16
67+
; GFX906-DOT10-DISABLED: v_fma_mix_f32
6268
define amdgpu_kernel void @dotproduct_f16_f32(ptr addrspace(1) %src1,
6369
ptr addrspace(1) %src2,
6470
ptr addrspace(1) nocapture %dst) {
@@ -85,8 +91,11 @@ entry:
8591
ret void
8692
}
8793

88-
; We only want to generate fdot2 if vector element of dot product is converted from f16 to f32
89-
; and the vectors are of type <2 x half>
94+
; We only want to generate fdot2 if:
95+
; - vector element of dot product is converted from f16 to f32, and
96+
; - the vectors are of type <2 x half>, and
97+
; - "dot10-insts" is enabled
98+
9099
; GCN-LABEL: {{^}}dotproduct_diffvecorder
91100
; GFX900: v_mad_mix_f32
92101
; GFX900: v_mad_mix_f32
@@ -99,6 +108,7 @@ entry:
99108

100109
; GFX906-CONTRACT: v_dot2_f32_f16
101110
; GFX906-DENORM-CONTRACT: v_dot2_f32_f16
111+
; GFX906-DOT10-DISABLED: v_fma_mix_f32
102112
define amdgpu_kernel void @dotproduct_diffvecorder(ptr addrspace(1) %src1,
103113
ptr addrspace(1) %src2,
104114
ptr addrspace(1) nocapture %dst) {
@@ -136,6 +146,7 @@ entry:
136146

137147
; GFX906-CONTRACT: v_fma_mix_f32
138148
; GFX906-DENORM-CONTRACT: v_fma_mix_f32
149+
; GFX906-DOT10-DISABLED: v_fma_mix_f32
139150
define amdgpu_kernel void @dotproduct_v4f16(ptr addrspace(1) %src1,
140151
ptr addrspace(1) %src2,
141152
ptr addrspace(1) nocapture %dst) {
@@ -173,6 +184,7 @@ entry:
173184

174185
; GFX906-CONTRACT: v_fma_mix_f32
175186
; GFX906-DENORM-CONTRACT: v_fma_mix_f32
187+
; GFX906-DOT10-DISABLED: v_fma_mix_f32
176188
define amdgpu_kernel void @NotAdotproduct(ptr addrspace(1) %src1,
177189
ptr addrspace(1) %src2,
178190
ptr addrspace(1) nocapture %dst) {
@@ -210,6 +222,7 @@ entry:
210222

211223
; GFX906-CONTRACT: v_fma_mix_f32
212224
; GFX906-DENORM-CONTRACT: v_fma_mix_f32
225+
; GFX906-DOT10-DISABLED: v_fma_mix_f32
213226
define amdgpu_kernel void @Diff_Idx_NotAdotproduct(ptr addrspace(1) %src1,
214227
ptr addrspace(1) %src2,
215228
ptr addrspace(1) nocapture %dst) {

0 commit comments

Comments
 (0)