@@ -38,6 +38,16 @@ define <16 x float> @test2(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
38
38
ret <16 x float > %neg
39
39
}
40
40
41
+ define <16 x float > @test2_nsz (<16 x float > %a , <16 x float > %b , <16 x float > %c ) {
42
+ ; CHECK-LABEL: test2_nsz:
43
+ ; CHECK: # %bb.0:
44
+ ; CHECK-NEXT: vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
45
+ ; CHECK-NEXT: retq
46
+ %fma = call nsz <16 x float > @llvm.fma.v16f32 (<16 x float > %a , <16 x float > %b , <16 x float > %c )
47
+ %neg = fneg <16 x float > %fma
48
+ ret <16 x float > %neg
49
+ }
50
+
41
51
define <16 x float > @test3 (<16 x float > %a , <16 x float > %b , <16 x float > %c ) {
42
52
; CHECK-LABEL: test3:
43
53
; CHECK: # %bb.0:
@@ -49,6 +59,17 @@ define <16 x float> @test3(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
49
59
ret <16 x float > %sub.i
50
60
}
51
61
62
+ define <16 x float > @test3_nsz (<16 x float > %a , <16 x float > %b , <16 x float > %c ) {
63
+ ; CHECK-LABEL: test3_nsz:
64
+ ; CHECK: # %bb.0:
65
+ ; CHECK-NEXT: vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
66
+ ; CHECK-NEXT: retq
67
+ %t0 = fneg <16 x float > %b
68
+ %t1 = call nsz <16 x float > @llvm.fma.v16f32 (<16 x float > %a , <16 x float > %t0 , <16 x float > %c )
69
+ %sub.i = fneg <16 x float > %t1
70
+ ret <16 x float > %sub.i
71
+ }
72
+
52
73
define <16 x float > @test4 (<16 x float > %a , <16 x float > %b , <16 x float > %c ) {
53
74
; CHECK-LABEL: test4:
54
75
; CHECK: # %bb.0:
@@ -61,6 +82,18 @@ define <16 x float> @test4(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
61
82
ret <16 x float > %sub.i
62
83
}
63
84
85
+ define <16 x float > @test4_nsz (<16 x float > %a , <16 x float > %b , <16 x float > %c ) {
86
+ ; CHECK-LABEL: test4_nsz:
87
+ ; CHECK: # %bb.0:
88
+ ; CHECK-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
89
+ ; CHECK-NEXT: retq
90
+ %t0 = fneg <16 x float > %b
91
+ %t1 = fneg <16 x float > %c
92
+ %t2 = call nsz <16 x float > @llvm.fma.v16f32 (<16 x float > %a , <16 x float > %t0 , <16 x float > %t1 )
93
+ %sub.i = fsub <16 x float > <float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 >, %t2
94
+ ret <16 x float > %sub.i
95
+ }
96
+
64
97
define <16 x float > @test5 (<16 x float > %a , <16 x float > %b , <16 x float > %c ) {
65
98
; CHECK-LABEL: test5:
66
99
; CHECK: # %bb.0: # %entry
@@ -84,6 +117,18 @@ define <16 x float> @test6(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
84
117
ret <16 x float > %sub.i
85
118
}
86
119
120
+ define <16 x float > @test6_nsz (<16 x float > %a , <16 x float > %b , <16 x float > %c ) {
121
+ ; CHECK-LABEL: test6_nsz:
122
+ ; CHECK: # %bb.0:
123
+ ; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0
124
+ ; CHECK-NEXT: retq
125
+ %t0 = fneg <16 x float > %b
126
+ %t1 = fneg <16 x float > %c
127
+ %t2 = call nsz <16 x float > @llvm.x86.avx512.vfmadd.ps.512 (<16 x float > %a , <16 x float > %t0 , <16 x float > %t1 , i32 10 )
128
+ %sub.i = fsub <16 x float > <float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 >, %t2
129
+ ret <16 x float > %sub.i
130
+ }
131
+
87
132
define <8 x float > @test7 (<8 x float > %a , <8 x float > %b , <8 x float > %c ) {
88
133
; CHECK-LABEL: test7:
89
134
; CHECK: # %bb.0:
@@ -95,6 +140,17 @@ define <8 x float> @test7(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
95
140
ret <8 x float > %sub.i
96
141
}
97
142
143
+ define <8 x float > @test7_nsz (<8 x float > %a , <8 x float > %b , <8 x float > %c ) {
144
+ ; CHECK-LABEL: test7_nsz:
145
+ ; CHECK: # %bb.0:
146
+ ; CHECK-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
147
+ ; CHECK-NEXT: retq
148
+ %t0 = fneg <8 x float > %c
149
+ %t1 = call nsz <8 x float > @llvm.fma.v8f32 (<8 x float > %a , <8 x float > %b , <8 x float > %t0 )
150
+ %sub.i = fsub <8 x float > <float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 , float -0 .0 >, %t1
151
+ ret <8 x float > %sub.i
152
+ }
153
+
98
154
define <8 x float > @test8 (<8 x float > %a , <8 x float > %b , <8 x float > %c ) {
99
155
; CHECK-LABEL: test8:
100
156
; CHECK: # %bb.0: # %entry
@@ -116,6 +172,16 @@ define <8 x double> @test9(<8 x double> %a, <8 x double> %b, <8 x double> %c) {
116
172
ret <8 x double > %sub.i
117
173
}
118
174
175
+ define <8 x double > @test9_nsz (<8 x double > %a , <8 x double > %b , <8 x double > %c ) {
176
+ ; CHECK-LABEL: test9_nsz:
177
+ ; CHECK: # %bb.0:
178
+ ; CHECK-NEXT: vfnmsub213pd {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
179
+ ; CHECK-NEXT: retq
180
+ %t0 = tail call nsz <8 x double > @llvm.x86.avx512.vfmadd.pd.512 (<8 x double > %a , <8 x double > %b , <8 x double > %c , i32 4 )
181
+ %sub.i = fneg <8 x double > %t0
182
+ ret <8 x double > %sub.i
183
+ }
184
+
119
185
define <2 x double > @test10 (<2 x double > %a , <2 x double > %b , <2 x double > %c ) {
120
186
; CHECK-LABEL: test10:
121
187
; CHECK: # %bb.0: # %entry
0 commit comments