5
5
; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906
6
6
; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math=preserve-sign -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-CONTRACT
7
7
; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-DENORM-CONTRACT
8
+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -mattr="+dot7-insts,-dot10-insts" -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX906-DOT10-DISABLED
8
9
; (fadd (fmul S1.x, S2.x), (fadd (fmul (S1.y, S2.y), z))) -> (fdot2 S1, S2, z)
9
10
10
11
; Tests to make sure fdot2 is not generated when vector elements of dot-product expressions
21
22
22
23
; GFX906-CONTRACT: v_mac_f16_e32
23
24
; GFX906-DENORM-CONTRACT: v_fma_f16
25
+ ; GFX906-DOT10-DISABLED: v_fma_f16
24
26
define amdgpu_kernel void @dotproduct_f16 (ptr addrspace (1 ) %src1 ,
25
27
ptr addrspace (1 ) %src2 ,
26
28
ptr addrspace (1 ) nocapture %dst ) {
@@ -44,8 +46,11 @@ entry:
44
46
}
45
47
46
48
47
- ; We only want to generate fdot2 if vector element of dot product is converted from f16 to f32
48
- ; and the vectors are of type <2 x half>
49
+ ; We only want to generate fdot2 if:
50
+ ; - vector element of dot product is converted from f16 to f32, and
51
+ ; - the vectors are of type <2 x half>, and
52
+ ; - "dot10-insts" is enabled
53
+
49
54
; GCN-LABEL: {{^}}dotproduct_f16_f32
50
55
; GFX900: v_mad_mix_f32
51
56
; GFX900: v_mad_mix_f32
59
64
; GFX906-CONTRACT: v_dot2_f32_f16
60
65
61
66
; GFX906-DENORM-CONTRACT: v_dot2_f32_f16
67
+ ; GFX906-DOT10-DISABLED: v_fma_mix_f32
62
68
define amdgpu_kernel void @dotproduct_f16_f32 (ptr addrspace (1 ) %src1 ,
63
69
ptr addrspace (1 ) %src2 ,
64
70
ptr addrspace (1 ) nocapture %dst ) {
@@ -85,8 +91,11 @@ entry:
85
91
ret void
86
92
}
87
93
88
- ; We only want to generate fdot2 if vector element of dot product is converted from f16 to f32
89
- ; and the vectors are of type <2 x half>
94
+ ; We only want to generate fdot2 if:
95
+ ; - vector element of dot product is converted from f16 to f32, and
96
+ ; - the vectors are of type <2 x half>, and
97
+ ; - "dot10-insts" is enabled
98
+
90
99
; GCN-LABEL: {{^}}dotproduct_diffvecorder
91
100
; GFX900: v_mad_mix_f32
92
101
; GFX900: v_mad_mix_f32
@@ -99,6 +108,7 @@ entry:
99
108
100
109
; GFX906-CONTRACT: v_dot2_f32_f16
101
110
; GFX906-DENORM-CONTRACT: v_dot2_f32_f16
111
+ ; GFX906-DOT10-DISABLED: v_fma_mix_f32
102
112
define amdgpu_kernel void @dotproduct_diffvecorder (ptr addrspace (1 ) %src1 ,
103
113
ptr addrspace (1 ) %src2 ,
104
114
ptr addrspace (1 ) nocapture %dst ) {
@@ -136,6 +146,7 @@ entry:
136
146
137
147
; GFX906-CONTRACT: v_fma_mix_f32
138
148
; GFX906-DENORM-CONTRACT: v_fma_mix_f32
149
+ ; GFX906-DOT10-DISABLED: v_fma_mix_f32
139
150
define amdgpu_kernel void @dotproduct_v4f16 (ptr addrspace (1 ) %src1 ,
140
151
ptr addrspace (1 ) %src2 ,
141
152
ptr addrspace (1 ) nocapture %dst ) {
@@ -173,6 +184,7 @@ entry:
173
184
174
185
; GFX906-CONTRACT: v_fma_mix_f32
175
186
; GFX906-DENORM-CONTRACT: v_fma_mix_f32
187
+ ; GFX906-DOT10-DISABLED: v_fma_mix_f32
176
188
define amdgpu_kernel void @NotAdotproduct (ptr addrspace (1 ) %src1 ,
177
189
ptr addrspace (1 ) %src2 ,
178
190
ptr addrspace (1 ) nocapture %dst ) {
@@ -210,6 +222,7 @@ entry:
210
222
211
223
; GFX906-CONTRACT: v_fma_mix_f32
212
224
; GFX906-DENORM-CONTRACT: v_fma_mix_f32
225
+ ; GFX906-DOT10-DISABLED: v_fma_mix_f32
213
226
define amdgpu_kernel void @Diff_Idx_NotAdotproduct (ptr addrspace (1 ) %src1 ,
214
227
ptr addrspace (1 ) %src2 ,
215
228
ptr addrspace (1 ) nocapture %dst ) {
0 commit comments