Skip to content

Commit fe5b9a6

Browse files
committed
AMDGPU/GlobalISel: Make strict fadd, fmul and fma legal
1 parent e78a43d commit fe5b9a6

File tree

6 files changed

+383
-1
lines changed

6 files changed

+383
-1
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4218,6 +4218,9 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
42184218
case G_SSUBO:
42194219
case G_SADDE:
42204220
case G_SSUBE:
4221+
case G_STRICT_FADD:
4222+
case G_STRICT_FMUL:
4223+
case G_STRICT_FMA:
42214224
return fewerElementsVectorMultiEltType(GMI, NumElts);
42224225
case G_ICMP:
42234226
case G_FCMP:
@@ -4833,6 +4836,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
48334836
return Legalized;
48344837
}
48354838
case TargetOpcode::G_FMA:
4839+
case TargetOpcode::G_STRICT_FMA:
48364840
case TargetOpcode::G_FSHR:
48374841
case TargetOpcode::G_FSHL: {
48384842
Observer.changingInstr(MI);

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -702,7 +702,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
702702
getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({CodePtr});
703703

704704
auto &FPOpActions = getActionDefinitionsBuilder(
705-
{ G_FADD, G_FMUL, G_FMA, G_FCANONICALIZE})
705+
{ G_FADD, G_FMUL, G_FMA, G_FCANONICALIZE,
706+
G_STRICT_FADD, G_STRICT_FMUL, G_STRICT_FMA})
706707
.legalFor({S32, S64});
707708
auto &TrigActions = getActionDefinitionsBuilder({G_FSIN, G_FCOS})
708709
.customFor({S32, S64});

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3710,6 +3710,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
37103710
case AMDGPU::G_FMAXNUM_IEEE:
37113711
case AMDGPU::G_FCANONICALIZE:
37123712
case AMDGPU::G_INTRINSIC_TRUNC:
3713+
case AMDGPU::G_STRICT_FADD:
3714+
case AMDGPU::G_STRICT_FSUB:
3715+
case AMDGPU::G_STRICT_FMA:
37133716
case AMDGPU::G_BSWAP: // TODO: Somehow expand for scalar?
37143717
case AMDGPU::G_FSHR: // TODO: Expand for scalar
37153718
case AMDGPU::G_AMDGPU_FMIN_LEGACY:
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
3+
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4+
5+
define half @v_constained_fma_f16_fpexcept_strict(half %x, half %y, half %z) #0 {
6+
; GCN-LABEL: v_constained_fma_f16_fpexcept_strict:
7+
; GCN: ; %bb.0:
8+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9+
; GCN-NEXT: v_fma_f16 v0, v0, v1, v2
10+
; GCN-NEXT: s_setpc_b64 s[30:31]
11+
%val = call half @llvm.experimental.constrained.fma.f16(half %x, half %y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
12+
ret half %val
13+
}
14+
15+
define <2 x half> @v_constained_fma_v2f16_fpexcept_strict(<2 x half> %x, <2 x half> %y, <2 x half> %z) #0 {
16+
; GFX9-LABEL: v_constained_fma_v2f16_fpexcept_strict:
17+
; GFX9: ; %bb.0:
18+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19+
; GFX9-NEXT: v_pk_fma_f16 v0, v0, v1, v2
20+
; GFX9-NEXT: s_setpc_b64 s[30:31]
21+
;
22+
; GFX8-LABEL: v_constained_fma_v2f16_fpexcept_strict:
23+
; GFX8: ; %bb.0:
24+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25+
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
26+
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1
27+
; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2
28+
; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2
29+
; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5
30+
; GFX8-NEXT: v_mov_b32_e32 v2, 16
31+
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
32+
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
33+
; GFX8-NEXT: s_setpc_b64 s[30:31]
34+
%val = call <2 x half> @llvm.experimental.constrained.fma.v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
35+
ret <2 x half> %val
36+
}
37+
38+
define <3 x half> @v_constained_fma_v3f16_fpexcept_strict(<3 x half> %x, <3 x half> %y, <3 x half> %z) #0 {
39+
; GFX9-LABEL: v_constained_fma_v3f16_fpexcept_strict:
40+
; GFX9: ; %bb.0:
41+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42+
; GFX9-NEXT: v_pk_fma_f16 v0, v0, v2, v4
43+
; GFX9-NEXT: v_pk_fma_f16 v1, v1, v3, v5
44+
; GFX9-NEXT: s_setpc_b64 s[30:31]
45+
;
46+
; GFX8-LABEL: v_constained_fma_v3f16_fpexcept_strict:
47+
; GFX8: ; %bb.0:
48+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49+
; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0
50+
; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v2
51+
; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v4
52+
; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4
53+
; GFX8-NEXT: v_fma_f16 v2, v6, v7, v8
54+
; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5
55+
; GFX8-NEXT: v_mov_b32_e32 v3, 16
56+
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
57+
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
58+
; GFX8-NEXT: v_bfe_u32 v1, v1, 0, 16
59+
; GFX8-NEXT: s_setpc_b64 s[30:31]
60+
%val = call <3 x half> @llvm.experimental.constrained.fma.v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
61+
ret <3 x half> %val
62+
}
63+
64+
define <4 x half> @v_constained_fma_v4f16_fpexcept_strict(<4 x half> %x, <4 x half> %y, <4 x half> %z) #0 {
65+
; GFX9-LABEL: v_constained_fma_v4f16_fpexcept_strict:
66+
; GFX9: ; %bb.0:
67+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68+
; GFX9-NEXT: v_pk_fma_f16 v0, v0, v2, v4
69+
; GFX9-NEXT: v_pk_fma_f16 v1, v1, v3, v5
70+
; GFX9-NEXT: s_setpc_b64 s[30:31]
71+
;
72+
; GFX8-LABEL: v_constained_fma_v4f16_fpexcept_strict:
73+
; GFX8: ; %bb.0:
74+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75+
; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0
76+
; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v2
77+
; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v4
78+
; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v1
79+
; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v3
80+
; GFX8-NEXT: v_lshrrev_b32_e32 v11, 16, v5
81+
; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4
82+
; GFX8-NEXT: v_fma_f16 v2, v6, v8, v10
83+
; GFX8-NEXT: v_mov_b32_e32 v4, 16
84+
; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5
85+
; GFX8-NEXT: v_fma_f16 v3, v7, v9, v11
86+
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
87+
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
88+
; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
89+
; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
90+
; GFX8-NEXT: s_setpc_b64 s[30:31]
91+
%val = call <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half> %x, <4 x half> %y, <4 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
92+
ret <4 x half> %val
93+
}
94+
95+
define half @v_constained_fma_f16_fpexcept_strict_fneg(half %x, half %y, half %z) #0 {
96+
; GCN-LABEL: v_constained_fma_f16_fpexcept_strict_fneg:
97+
; GCN: ; %bb.0:
98+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
99+
; GCN-NEXT: v_fma_f16 v0, v0, v1, -v2
100+
; GCN-NEXT: s_setpc_b64 s[30:31]
101+
%neg.z = fneg half %z
102+
%val = call half @llvm.experimental.constrained.fma.f16(half %x, half %y, half %neg.z, metadata !"round.tonearest", metadata !"fpexcept.strict")
103+
ret half %val
104+
}
105+
106+
define half @v_constained_fma_f16_fpexcept_strict_fneg_fneg(half %x, half %y, half %z) #0 {
107+
; GCN-LABEL: v_constained_fma_f16_fpexcept_strict_fneg_fneg:
108+
; GCN: ; %bb.0:
109+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110+
; GCN-NEXT: v_fma_f16 v0, -v0, -v1, v2
111+
; GCN-NEXT: s_setpc_b64 s[30:31]
112+
%neg.x = fneg half %x
113+
%neg.y = fneg half %y
114+
%val = call half @llvm.experimental.constrained.fma.f16(half %neg.x, half %neg.y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
115+
ret half %val
116+
}
117+
118+
define half @v_constained_fma_f16_fpexcept_strict_fabs_fabs(half %x, half %y, half %z) #0 {
119+
; GCN-LABEL: v_constained_fma_f16_fpexcept_strict_fabs_fabs:
120+
; GCN: ; %bb.0:
121+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122+
; GCN-NEXT: v_fma_f16 v0, |v0|, |v1|, v2
123+
; GCN-NEXT: s_setpc_b64 s[30:31]
124+
%neg.x = call half @llvm.fabs.f16(half %x)
125+
%neg.y = call half @llvm.fabs.f16(half %y)
126+
%val = call half @llvm.experimental.constrained.fma.f16(half %neg.x, half %neg.y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
127+
ret half %val
128+
}
129+
130+
define <2 x half> @v_constained_fma_v2f16_fpexcept_strict_fneg_fneg(<2 x half> %x, <2 x half> %y, <2 x half> %z) #0 {
131+
; GFX9-LABEL: v_constained_fma_v2f16_fpexcept_strict_fneg_fneg:
132+
; GFX9: ; %bb.0:
133+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134+
; GFX9-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,1,0] neg_hi:[1,1,0]
135+
; GFX9-NEXT: s_setpc_b64 s[30:31]
136+
;
137+
; GFX8-LABEL: v_constained_fma_v2f16_fpexcept_strict_fneg_fneg:
138+
; GFX8: ; %bb.0:
139+
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140+
; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
141+
; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
142+
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0
143+
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1
144+
; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2
145+
; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2
146+
; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5
147+
; GFX8-NEXT: v_mov_b32_e32 v2, 16
148+
; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
149+
; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
150+
; GFX8-NEXT: s_setpc_b64 s[30:31]
151+
%neg.x = fneg <2 x half> %x
152+
%neg.y = fneg <2 x half> %y
153+
%val = call <2 x half> @llvm.experimental.constrained.fma.v2f16(<2 x half> %neg.x, <2 x half> %neg.y, <2 x half> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
154+
ret <2 x half> %val
155+
}
156+
157+
declare half @llvm.fabs.f16(half) #1
158+
declare half @llvm.experimental.constrained.fma.f16(half, half, half, metadata, metadata) #1
159+
declare <2 x half> @llvm.experimental.constrained.fma.v2f16(<2 x half>, <2 x half>, <2 x half>, metadata, metadata) #1
160+
declare <3 x half> @llvm.experimental.constrained.fma.v3f16(<3 x half>, <3 x half>, <3 x half>, metadata, metadata) #1
161+
declare <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half>, <4 x half>, <4 x half>, metadata, metadata) #1
162+
163+
attributes #0 = { strictfp }
164+
attributes #1 = { inaccessiblememonly nounwind willreturn }
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
3+
4+
define float @v_constained_fma_f32_fpexcept_strict(float %x, float %y, float %z) #0 {
5+
; GCN-LABEL: v_constained_fma_f32_fpexcept_strict:
6+
; GCN: ; %bb.0:
7+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8+
; GCN-NEXT: v_fma_f32 v0, v0, v1, v2
9+
; GCN-NEXT: s_setpc_b64 s[30:31]
10+
%val = call float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
11+
ret float %val
12+
}
13+
14+
define <2 x float> @v_constained_fma_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 {
15+
; GCN-LABEL: v_constained_fma_v2f32_fpexcept_strict:
16+
; GCN: ; %bb.0:
17+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18+
; GCN-NEXT: v_fma_f32 v0, v0, v2, v4
19+
; GCN-NEXT: v_fma_f32 v1, v1, v3, v5
20+
; GCN-NEXT: s_setpc_b64 s[30:31]
21+
%val = call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
22+
ret <2 x float> %val
23+
}
24+
25+
define <3 x float> @v_constained_fma_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y, <3 x float> %z) #0 {
26+
; GCN-LABEL: v_constained_fma_v3f32_fpexcept_strict:
27+
; GCN: ; %bb.0:
28+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29+
; GCN-NEXT: v_fma_f32 v0, v0, v3, v6
30+
; GCN-NEXT: v_fma_f32 v1, v1, v4, v7
31+
; GCN-NEXT: v_fma_f32 v2, v2, v5, v8
32+
; GCN-NEXT: s_setpc_b64 s[30:31]
33+
%val = call <3 x float> @llvm.experimental.constrained.fma.v3f32(<3 x float> %x, <3 x float> %y, <3 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
34+
ret <3 x float> %val
35+
}
36+
37+
define <4 x float> @v_constained_fma_v4f32_fpexcept_strict(<4 x float> %x, <4 x float> %y, <4 x float> %z) #0 {
38+
; GCN-LABEL: v_constained_fma_v4f32_fpexcept_strict:
39+
; GCN: ; %bb.0:
40+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41+
; GCN-NEXT: v_fma_f32 v0, v0, v4, v8
42+
; GCN-NEXT: v_fma_f32 v1, v1, v5, v9
43+
; GCN-NEXT: v_fma_f32 v2, v2, v6, v10
44+
; GCN-NEXT: v_fma_f32 v3, v3, v7, v11
45+
; GCN-NEXT: s_setpc_b64 s[30:31]
46+
%val = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
47+
ret <4 x float> %val
48+
}
49+
50+
define float @v_constained_fma_f32_fpexcept_strict_fneg(float %x, float %y, float %z) #0 {
51+
; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fneg:
52+
; GCN: ; %bb.0:
53+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54+
; GCN-NEXT: v_fma_f32 v0, v0, v1, -v2
55+
; GCN-NEXT: s_setpc_b64 s[30:31]
56+
%neg.z = fneg float %z
57+
%val = call float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %neg.z, metadata !"round.tonearest", metadata !"fpexcept.strict")
58+
ret float %val
59+
}
60+
61+
define float @v_constained_fma_f32_fpexcept_strict_fneg_fneg(float %x, float %y, float %z) #0 {
62+
; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fneg_fneg:
63+
; GCN: ; %bb.0:
64+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
65+
; GCN-NEXT: v_fma_f32 v0, -v0, -v1, v2
66+
; GCN-NEXT: s_setpc_b64 s[30:31]
67+
%neg.x = fneg float %x
68+
%neg.y = fneg float %y
69+
%val = call float @llvm.experimental.constrained.fma.f32(float %neg.x, float %neg.y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
70+
ret float %val
71+
}
72+
73+
define float @v_constained_fma_f32_fpexcept_strict_fabs_fabs(float %x, float %y, float %z) #0 {
74+
; GCN-LABEL: v_constained_fma_f32_fpexcept_strict_fabs_fabs:
75+
; GCN: ; %bb.0:
76+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77+
; GCN-NEXT: v_fma_f32 v0, |v0|, |v1|, v2
78+
; GCN-NEXT: s_setpc_b64 s[30:31]
79+
%neg.x = call float @llvm.fabs.f32(float %x)
80+
%neg.y = call float @llvm.fabs.f32(float %y)
81+
%val = call float @llvm.experimental.constrained.fma.f32(float %neg.x, float %neg.y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
82+
ret float %val
83+
}
84+
85+
define <2 x float> @v_constained_fma_v2f32_fpexcept_strict_fneg_fneg(<2 x float> %x, <2 x float> %y, <2 x float> %z) #0 {
86+
; GCN-LABEL: v_constained_fma_v2f32_fpexcept_strict_fneg_fneg:
87+
; GCN: ; %bb.0:
88+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89+
; GCN-NEXT: v_fma_f32 v0, -v0, -v2, v4
90+
; GCN-NEXT: v_fma_f32 v1, -v1, -v3, v5
91+
; GCN-NEXT: s_setpc_b64 s[30:31]
92+
%neg.x = fneg <2 x float> %x
93+
%neg.y = fneg <2 x float> %y
94+
%val = call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> %neg.x, <2 x float> %neg.y, <2 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict")
95+
ret <2 x float> %val
96+
}
97+
98+
declare float @llvm.fabs.f32(float) #1
99+
declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) #1
100+
declare <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, metadata, metadata) #1
101+
declare <3 x float> @llvm.experimental.constrained.fma.v3f32(<3 x float>, <3 x float>, <3 x float>, metadata, metadata) #1
102+
declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata) #1
103+
104+
attributes #0 = { strictfp }
105+
attributes #1 = { inaccessiblememonly nounwind willreturn }

0 commit comments

Comments
 (0)