Skip to content

Commit 57e8f84

Browse files
committed
[X86][FP16] Fix a bug when Combine the FADD(A, FMA(B, C, 0)) to FMA(B, C, A).
This bug was introduced by D109953. The operand order of generated FMA is wrong. Differential Revision: https://reviews.llvm.org/D110606
1 parent aa061dd commit 57e8f84

File tree

4 files changed

+26
-51
lines changed

4 files changed

+26
-51
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47795,7 +47795,7 @@ static SDValue combineFaddCFmul(SDNode *N, SelectionDAG &DAG,
4779547795
// FIXME: How do we handle when fast math flags of FADD are different from
4779647796
// CFMUL's?
4779747797
SDValue CFmul =
47798-
DAG.getNode(NewOp, SDLoc(N), CVT, FAddOp1, MulOp0, MulOp1, N->getFlags());
47798+
DAG.getNode(NewOp, SDLoc(N), CVT, MulOp0, MulOp1, FAddOp1, N->getFlags());
4779947799
return DAG.getBitcast(VT, CFmul);
4780047800
}
4780147801

llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@
66
define dso_local <32 x half> @test1(<32 x half> %acc, <32 x half> %a, <32 x half> %b) {
77
; NO-SZ-LABEL: test1:
88
; NO-SZ: # %bb.0: # %entry
9-
; NO-SZ-NEXT: vfcmaddcph %zmm1, %zmm0, %zmm2
10-
; NO-SZ-NEXT: vmovaps %zmm2, %zmm0
9+
; NO-SZ-NEXT: vfcmaddcph %zmm2, %zmm1, %zmm0
1110
; NO-SZ-NEXT: retq
1211
;
1312
; HAS-SZ-LABEL: test1:
@@ -28,8 +27,7 @@ entry:
2827
define dso_local <32 x half> @test2(<32 x half> %acc, <32 x half> %a, <32 x half> %b) {
2928
; NO-SZ-LABEL: test2:
3029
; NO-SZ: # %bb.0: # %entry
31-
; NO-SZ-NEXT: vfmaddcph %zmm1, %zmm0, %zmm2
32-
; NO-SZ-NEXT: vmovaps %zmm2, %zmm0
30+
; NO-SZ-NEXT: vfmaddcph %zmm2, %zmm1, %zmm0
3331
; NO-SZ-NEXT: retq
3432
;
3533
; HAS-SZ-LABEL: test2:
@@ -50,8 +48,7 @@ entry:
5048
define dso_local <16 x half> @test3(<16 x half> %acc, <16 x half> %a, <16 x half> %b) {
5149
; NO-SZ-LABEL: test3:
5250
; NO-SZ: # %bb.0: # %entry
53-
; NO-SZ-NEXT: vfcmaddcph %ymm1, %ymm0, %ymm2
54-
; NO-SZ-NEXT: vmovaps %ymm2, %ymm0
51+
; NO-SZ-NEXT: vfcmaddcph %ymm2, %ymm1, %ymm0
5552
; NO-SZ-NEXT: retq
5653
;
5754
; HAS-SZ-LABEL: test3:
@@ -72,8 +69,7 @@ entry:
7269
define dso_local <16 x half> @test4(<16 x half> %acc, <16 x half> %a, <16 x half> %b) {
7370
; NO-SZ-LABEL: test4:
7471
; NO-SZ: # %bb.0: # %entry
75-
; NO-SZ-NEXT: vfmaddcph %ymm1, %ymm0, %ymm2
76-
; NO-SZ-NEXT: vmovaps %ymm2, %ymm0
72+
; NO-SZ-NEXT: vfmaddcph %ymm2, %ymm1, %ymm0
7773
; NO-SZ-NEXT: retq
7874
;
7975
; HAS-SZ-LABEL: test4:
@@ -94,8 +90,7 @@ entry:
9490
define dso_local <8 x half> @test5(<8 x half> %acc, <8 x half> %a, <8 x half> %b) {
9591
; NO-SZ-LABEL: test5:
9692
; NO-SZ: # %bb.0: # %entry
97-
; NO-SZ-NEXT: vfcmaddcph %xmm1, %xmm0, %xmm2
98-
; NO-SZ-NEXT: vmovaps %xmm2, %xmm0
93+
; NO-SZ-NEXT: vfcmaddcph %xmm2, %xmm1, %xmm0
9994
; NO-SZ-NEXT: retq
10095
;
10196
; HAS-SZ-LABEL: test5:
@@ -116,8 +111,7 @@ entry:
116111
define dso_local <8 x half> @test6(<8 x half> %acc, <8 x half> %a, <8 x half> %b) {
117112
; NO-SZ-LABEL: test6:
118113
; NO-SZ: # %bb.0: # %entry
119-
; NO-SZ-NEXT: vfmaddcph %xmm1, %xmm0, %xmm2
120-
; NO-SZ-NEXT: vmovaps %xmm2, %xmm0
114+
; NO-SZ-NEXT: vfmaddcph %xmm2, %xmm1, %xmm0
121115
; NO-SZ-NEXT: retq
122116
;
123117
; HAS-SZ-LABEL: test6:
@@ -139,8 +133,7 @@ entry:
139133
define dso_local <32 x half> @test13(<32 x half> %acc, <32 x half> %a, <32 x half> %b) {
140134
; CHECK-LABEL: test13:
141135
; CHECK: # %bb.0: # %entry
142-
; CHECK-NEXT: vfcmaddcph %zmm1, %zmm0, %zmm2
143-
; CHECK-NEXT: vmovaps %zmm2, %zmm0
136+
; CHECK-NEXT: vfcmaddcph %zmm2, %zmm1, %zmm0
144137
; CHECK-NEXT: retq
145138
entry:
146139
%0 = bitcast <32 x half> %a to <16 x float>
@@ -154,8 +147,7 @@ entry:
154147
define dso_local <32 x half> @test14(<32 x half> %acc, <32 x half> %a, <32 x half> %b) {
155148
; CHECK-LABEL: test14:
156149
; CHECK: # %bb.0: # %entry
157-
; CHECK-NEXT: vfmaddcph %zmm1, %zmm0, %zmm2
158-
; CHECK-NEXT: vmovaps %zmm2, %zmm0
150+
; CHECK-NEXT: vfmaddcph %zmm2, %zmm1, %zmm0
159151
; CHECK-NEXT: retq
160152
entry:
161153
%0 = bitcast <32 x half> %a to <16 x float>
@@ -169,8 +161,7 @@ entry:
169161
define dso_local <16 x half> @test15(<16 x half> %acc, <16 x half> %a, <16 x half> %b) {
170162
; CHECK-LABEL: test15:
171163
; CHECK: # %bb.0: # %entry
172-
; CHECK-NEXT: vfcmaddcph %ymm1, %ymm0, %ymm2
173-
; CHECK-NEXT: vmovaps %ymm2, %ymm0
164+
; CHECK-NEXT: vfcmaddcph %ymm2, %ymm1, %ymm0
174165
; CHECK-NEXT: retq
175166
entry:
176167
%0 = bitcast <16 x half> %a to <8 x float>
@@ -184,8 +175,7 @@ entry:
184175
define dso_local <16 x half> @test16(<16 x half> %acc, <16 x half> %a, <16 x half> %b) {
185176
; CHECK-LABEL: test16:
186177
; CHECK: # %bb.0: # %entry
187-
; CHECK-NEXT: vfmaddcph %ymm1, %ymm0, %ymm2
188-
; CHECK-NEXT: vmovaps %ymm2, %ymm0
178+
; CHECK-NEXT: vfmaddcph %ymm2, %ymm1, %ymm0
189179
; CHECK-NEXT: retq
190180
entry:
191181
%0 = bitcast <16 x half> %a to <8 x float>
@@ -199,8 +189,7 @@ entry:
199189
define dso_local <8 x half> @test17(<8 x half> %acc, <8 x half> %a, <8 x half> %b) {
200190
; CHECK-LABEL: test17:
201191
; CHECK: # %bb.0: # %entry
202-
; CHECK-NEXT: vfcmaddcph %xmm1, %xmm0, %xmm2
203-
; CHECK-NEXT: vmovaps %xmm2, %xmm0
192+
; CHECK-NEXT: vfcmaddcph %xmm2, %xmm1, %xmm0
204193
; CHECK-NEXT: retq
205194
entry:
206195
%0 = bitcast <8 x half> %a to <4 x float>
@@ -214,8 +203,7 @@ entry:
214203
define dso_local <8 x half> @test18(<8 x half> %acc, <8 x half> %a, <8 x half> %b) {
215204
; CHECK-LABEL: test18:
216205
; CHECK: # %bb.0: # %entry
217-
; CHECK-NEXT: vfmaddcph %xmm1, %xmm0, %xmm2
218-
; CHECK-NEXT: vmovaps %xmm2, %xmm0
206+
; CHECK-NEXT: vfmaddcph %xmm2, %xmm1, %xmm0
219207
; CHECK-NEXT: retq
220208
entry:
221209
%0 = bitcast <8 x half> %a to <4 x float>

llvm/test/CodeGen/X86/avx512fp16-combine-vfmulc-fadd.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
define dso_local <32 x half> @test1(<32 x half> %acc.coerce, <32 x half> %lhs.coerce, <32 x half> %rhs.coerce) {
55
; CHECK-LABEL: test1:
66
; CHECK: # %bb.0: # %entry
7-
; CHECK-NEXT: vfmaddcph %zmm1, %zmm0, %zmm2
8-
; CHECK-NEXT: vmovaps %zmm2, %zmm0
7+
; CHECK-NEXT: vfmaddcph %zmm2, %zmm1, %zmm0
98
; CHECK-NEXT: retq
109
entry:
1110
%0 = bitcast <32 x half> %lhs.coerce to <16 x float>
@@ -19,8 +18,7 @@ entry:
1918
define dso_local <16 x half> @test2(<16 x half> %acc.coerce, <16 x half> %lhs.coerce, <16 x half> %rhs.coerce) {
2019
; CHECK-LABEL: test2:
2120
; CHECK: # %bb.0: # %entry
22-
; CHECK-NEXT: vfmaddcph %ymm1, %ymm0, %ymm2
23-
; CHECK-NEXT: vmovaps %ymm2, %ymm0
21+
; CHECK-NEXT: vfmaddcph %ymm2, %ymm1, %ymm0
2422
; CHECK-NEXT: retq
2523
entry:
2624
%0 = bitcast <16 x half> %lhs.coerce to <8 x float>
@@ -34,8 +32,7 @@ entry:
3432
define dso_local <8 x half> @test3(<8 x half> %acc.coerce, <8 x half> %lhs.coerce, <8 x half> %rhs.coerce) {
3533
; CHECK-LABEL: test3:
3634
; CHECK: # %bb.0: # %entry
37-
; CHECK-NEXT: vfmaddcph %xmm1, %xmm0, %xmm2
38-
; CHECK-NEXT: vmovaps %xmm2, %xmm0
35+
; CHECK-NEXT: vfmaddcph %xmm2, %xmm1, %xmm0
3936
; CHECK-NEXT: retq
4037
entry:
4138
%0 = bitcast <8 x half> %lhs.coerce to <4 x float>
@@ -50,8 +47,7 @@ entry:
5047
define dso_local <8 x half> @test4(<8 x half> %acc.coerce, <8 x half> %lhs.coerce, <8 x half> %rhs.coerce) {
5148
; CHECK-LABEL: test4:
5249
; CHECK: # %bb.0: # %entry
53-
; CHECK-NEXT: vfmaddcph %xmm1, %xmm0, %xmm2
54-
; CHECK-NEXT: vmovaps %xmm2, %xmm0
50+
; CHECK-NEXT: vfmaddcph %xmm2, %xmm1, %xmm0
5551
; CHECK-NEXT: retq
5652
entry:
5753
%0 = bitcast <8 x half> %lhs.coerce to <4 x float>

llvm/test/CodeGen/X86/avx512fp16-combine-xor-vfmulc-fadd.ll

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
define dso_local <32 x half> @test1(<32 x half> %acc.coerce, <32 x half> %lhs.coerce.conj, <32 x half> %rhs.coerce) local_unnamed_addr #0 {
55
; CHECK-LABEL: test1:
66
; CHECK: # %bb.0: # %entry
7-
; CHECK-NEXT: vfcmaddcph %zmm2, %zmm0, %zmm1
8-
; CHECK-NEXT: vmovaps %zmm1, %zmm0
7+
; CHECK-NEXT: vfcmaddcph %zmm1, %zmm2, %zmm0
98
; CHECK-NEXT: retq
109
entry:
1110
%0 = bitcast <32 x half> %lhs.coerce.conj to <16 x i32>
@@ -21,8 +20,7 @@ entry:
2120
define dso_local <32 x half> @test2(<32 x half> %acc.coerce, <32 x half> %lhs.coerce.conj, <32 x half> %rhs.coerce) local_unnamed_addr #0 {
2221
; CHECK-LABEL: test2:
2322
; CHECK: # %bb.0: # %entry
24-
; CHECK-NEXT: vfcmaddcph %zmm2, %zmm0, %zmm1
25-
; CHECK-NEXT: vmovaps %zmm1, %zmm0
23+
; CHECK-NEXT: vfcmaddcph %zmm1, %zmm2, %zmm0
2624
; CHECK-NEXT: retq
2725
entry:
2826
%0 = bitcast <32 x half> %lhs.coerce.conj to <16 x i32>
@@ -38,8 +36,7 @@ entry:
3836
define dso_local <16 x half> @test3(<16 x half> %acc.coerce, <16 x half> %lhs.coerce.conj, <16 x half> %rhs.coerce) local_unnamed_addr #0 {
3937
; CHECK-LABEL: test3:
4038
; CHECK: # %bb.0: # %entry
41-
; CHECK-NEXT: vfcmaddcph %ymm2, %ymm0, %ymm1
42-
; CHECK-NEXT: vmovaps %ymm1, %ymm0
39+
; CHECK-NEXT: vfcmaddcph %ymm1, %ymm2, %ymm0
4340
; CHECK-NEXT: retq
4441
entry:
4542
%0 = bitcast <16 x half> %lhs.coerce.conj to <8 x i32>
@@ -55,8 +52,7 @@ entry:
5552
define dso_local <8 x half> @test4(<8 x half> %acc.coerce, <8 x half> %lhs.coerce.conj, <8 x half> %rhs.coerce) local_unnamed_addr #0 {
5653
; CHECK-LABEL: test4:
5754
; CHECK: # %bb.0: # %entry
58-
; CHECK-NEXT: vfcmaddcph %xmm2, %xmm0, %xmm1
59-
; CHECK-NEXT: vmovaps %xmm1, %xmm0
55+
; CHECK-NEXT: vfcmaddcph %xmm1, %xmm2, %xmm0
6056
; CHECK-NEXT: retq
6157
entry:
6258
%0 = bitcast <8 x half> %lhs.coerce.conj to <4 x i32>
@@ -72,8 +68,7 @@ entry:
7268
define dso_local <8 x half> @test5(<8 x half> %acc.coerce, <8 x half> %lhs.coerce.conj, <8 x half> %rhs.coerce) local_unnamed_addr #0 {
7369
; CHECK-LABEL: test5:
7470
; CHECK: # %bb.0: # %entry
75-
; CHECK-NEXT: vfcmaddcph %xmm2, %xmm0, %xmm1
76-
; CHECK-NEXT: vmovaps %xmm1, %xmm0
71+
; CHECK-NEXT: vfcmaddcph %xmm1, %xmm2, %xmm0
7772
; CHECK-NEXT: retq
7873
entry:
7974
%0 = bitcast <8 x half> %lhs.coerce.conj to <4 x i32>
@@ -90,8 +85,7 @@ define dso_local <8 x half> @test6(<8 x half> %acc.coerce, <8 x half> %lhs.coerc
9085
; CHECK-LABEL: test6:
9186
; CHECK: # %bb.0: # %entry
9287
; CHECK-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
93-
; CHECK-NEXT: vfmaddcph %xmm1, %xmm0, %xmm2
94-
; CHECK-NEXT: vmovaps %xmm2, %xmm0
88+
; CHECK-NEXT: vfmaddcph %xmm2, %xmm1, %xmm0
9589
; CHECK-NEXT: retq
9690
entry:
9791
%0 = bitcast <8 x half> %lhs.coerce.conj to <4 x i32>
@@ -107,8 +101,7 @@ entry:
107101
define dso_local <8 x half> @test7(<8 x half> %acc.coerce, <8 x half> %lhs.coerce.conj, <8 x half> %rhs.coerce) local_unnamed_addr #0 {
108102
; CHECK-LABEL: test7:
109103
; CHECK: # %bb.0: # %entry
110-
; CHECK-NEXT: vfcmaddcph %xmm2, %xmm0, %xmm1
111-
; CHECK-NEXT: vmovaps %xmm1, %xmm0
104+
; CHECK-NEXT: vfcmaddcph %xmm1, %xmm2, %xmm0
112105
; CHECK-NEXT: retq
113106
entry:
114107
%0 = bitcast <8 x half> %lhs.coerce.conj to <4 x i32>
@@ -124,8 +117,7 @@ entry:
124117
define dso_local <8 x half> @test8(<8 x half> %acc.coerce, <4 x float> %lhs.coerce.conj, <8 x half> %rhs.coerce) local_unnamed_addr #0 {
125118
; CHECK-LABEL: test8:
126119
; CHECK: # %bb.0: # %entry
127-
; CHECK-NEXT: vfcmaddcph %xmm2, %xmm0, %xmm1
128-
; CHECK-NEXT: vmovaps %xmm1, %xmm0
120+
; CHECK-NEXT: vfcmaddcph %xmm1, %xmm2, %xmm0
129121
; CHECK-NEXT: retq
130122
entry:
131123
%0 = bitcast <4 x float> %lhs.coerce.conj to <4 x i32>
@@ -141,8 +133,7 @@ entry:
141133
define dso_local <32 x half> @test9(<32 x half> %acc.coerce, <8 x i64> %lhs.coerce.conj, <32 x half> %rhs.coerce) local_unnamed_addr #0 {
142134
; CHECK-LABEL: test9:
143135
; CHECK: # %bb.0: # %entry
144-
; CHECK-NEXT: vfcmaddcph %zmm2, %zmm0, %zmm1
145-
; CHECK-NEXT: vmovaps %zmm1, %zmm0
136+
; CHECK-NEXT: vfcmaddcph %zmm1, %zmm2, %zmm0
146137
; CHECK-NEXT: retq
147138
entry:
148139
%xor1.i = xor <8 x i64> %lhs.coerce.conj, <i64 -9223372034707292160, i64 -9223372034707292160, i64 -9223372034707292160, i64 -9223372034707292160, i64 -9223372034707292160, i64 -9223372034707292160, i64 -9223372034707292160, i64 -9223372034707292160>

0 commit comments

Comments
 (0)