Skip to content

Commit 4aa7b9c

Browse files
committed
[X86] X86InstComments - add FMA4 comments
These typically match the FMA3 equivalents, although the multiply operands sometimes get flipped due to the FMA3 permute variants.
1 parent 10417ad commit 4aa7b9c

14 files changed

+473
-268
lines changed

llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp

Lines changed: 146 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,40 @@ using namespace llvm;
199199
CASE_AVX512_INS_COMMON(Inst##SD, Z, m_Int) \
200200
CASE_AVX512_INS_COMMON(Inst##SS, Z, m_Int)
201201

202+
#define CASE_FMA4(Inst, suf) \
203+
CASE_AVX_INS_COMMON(Inst, 4, suf) \
204+
CASE_AVX_INS_COMMON(Inst, 4Y, suf)
205+
206+
#define CASE_FMA4_PACKED_RR(Inst) \
207+
CASE_FMA4(Inst##PD, rr) \
208+
CASE_FMA4(Inst##PS, rr)
209+
210+
#define CASE_FMA4_PACKED_RM(Inst) \
211+
CASE_FMA4(Inst##PD, rm) \
212+
CASE_FMA4(Inst##PS, rm)
213+
214+
#define CASE_FMA4_PACKED_MR(Inst) \
215+
CASE_FMA4(Inst##PD, mr) \
216+
CASE_FMA4(Inst##PS, mr)
217+
218+
#define CASE_FMA4_SCALAR_RR(Inst) \
219+
CASE_AVX_INS_COMMON(Inst##SD4, , rr) \
220+
CASE_AVX_INS_COMMON(Inst##SS4, , rr) \
221+
CASE_AVX_INS_COMMON(Inst##SD4, , rr_Int) \
222+
CASE_AVX_INS_COMMON(Inst##SS4, , rr_Int)
223+
224+
#define CASE_FMA4_SCALAR_RM(Inst) \
225+
CASE_AVX_INS_COMMON(Inst##SD4, , rm) \
226+
CASE_AVX_INS_COMMON(Inst##SS4, , rm) \
227+
CASE_AVX_INS_COMMON(Inst##SD4, , rm_Int) \
228+
CASE_AVX_INS_COMMON(Inst##SS4, , rm_Int)
229+
230+
#define CASE_FMA4_SCALAR_MR(Inst) \
231+
CASE_AVX_INS_COMMON(Inst##SD4, , mr) \
232+
CASE_AVX_INS_COMMON(Inst##SS4, , mr) \
233+
CASE_AVX_INS_COMMON(Inst##SD4, , mr_Int) \
234+
CASE_AVX_INS_COMMON(Inst##SS4, , mr_Int)
235+
202236
static unsigned getVectorRegSize(unsigned RegNo) {
203237
if (X86::ZMM0 <= RegNo && RegNo <= X86::ZMM31)
204238
return 512;
@@ -247,24 +281,133 @@ static void printMasking(raw_ostream &OS, const MCInst *MI,
247281
OS << " {z}";
248282
}
249283

250-
static bool printFMA3Comments(const MCInst *MI, raw_ostream &OS) {
284+
static bool printFMAComments(const MCInst *MI, raw_ostream &OS) {
251285
const char *Mul1Name = nullptr, *Mul2Name = nullptr, *AccName = nullptr;
252286
unsigned NumOperands = MI->getNumOperands();
253287
bool RegForm = false;
254288
bool Negate = false;
255289
StringRef AccStr = "+";
256290

257-
// The operands for FMA instructions without rounding fall into two forms.
291+
// The operands for FMA3 instructions without rounding fall into two forms:
258292
// dest, src1, src2, src3
259293
// dest, src1, mask, src2, src3
260294
// Where src3 is either a register or 5 memory address operands. So to find
261295
// dest and src1 we can index from the front. To find src2 and src3 we can
262296
// index from the end by taking into account memory vs register form when
263297
// finding src2.
264298

299+
// The operands for FMA4 instructions:
300+
// dest, src1, src2, src3
301+
// Where src2 OR src3 are either a register or 5 memory address operands. So
302+
// to find dest and src1 we can index from the front, src2 (reg/mem) follows
303+
// and then src3 (reg) will be at the end.
304+
265305
switch (MI->getOpcode()) {
266306
default:
267307
return false;
308+
309+
CASE_FMA4_PACKED_RR(FMADD)
310+
CASE_FMA4_SCALAR_RR(FMADD)
311+
RegForm = true;
312+
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
313+
LLVM_FALLTHROUGH;
314+
CASE_FMA4_PACKED_RM(FMADD)
315+
CASE_FMA4_SCALAR_RM(FMADD)
316+
Mul2Name = getRegName(MI->getOperand(2).getReg());
317+
Mul1Name = getRegName(MI->getOperand(1).getReg());
318+
break;
319+
CASE_FMA4_PACKED_MR(FMADD)
320+
CASE_FMA4_SCALAR_MR(FMADD)
321+
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
322+
Mul1Name = getRegName(MI->getOperand(1).getReg());
323+
break;
324+
325+
CASE_FMA4_PACKED_RR(FMSUB)
326+
CASE_FMA4_SCALAR_RR(FMSUB)
327+
RegForm = true;
328+
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
329+
LLVM_FALLTHROUGH;
330+
CASE_FMA4_PACKED_RM(FMSUB)
331+
CASE_FMA4_SCALAR_RM(FMSUB)
332+
Mul2Name = getRegName(MI->getOperand(2).getReg());
333+
Mul1Name = getRegName(MI->getOperand(1).getReg());
334+
AccStr = "-";
335+
break;
336+
CASE_FMA4_PACKED_MR(FMSUB)
337+
CASE_FMA4_SCALAR_MR(FMSUB)
338+
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
339+
Mul1Name = getRegName(MI->getOperand(1).getReg());
340+
AccStr = "-";
341+
break;
342+
343+
CASE_FMA4_PACKED_RR(FNMADD)
344+
CASE_FMA4_SCALAR_RR(FNMADD)
345+
RegForm = true;
346+
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
347+
LLVM_FALLTHROUGH;
348+
CASE_FMA4_PACKED_RM(FNMADD)
349+
CASE_FMA4_SCALAR_RM(FNMADD)
350+
Mul2Name = getRegName(MI->getOperand(2).getReg());
351+
Mul1Name = getRegName(MI->getOperand(1).getReg());
352+
Negate = true;
353+
break;
354+
CASE_FMA4_PACKED_MR(FNMADD)
355+
CASE_FMA4_SCALAR_MR(FNMADD)
356+
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
357+
Mul1Name = getRegName(MI->getOperand(1).getReg());
358+
Negate = true;
359+
break;
360+
361+
CASE_FMA4_PACKED_RR(FNMSUB)
362+
CASE_FMA4_SCALAR_RR(FNMSUB)
363+
RegForm = true;
364+
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
365+
LLVM_FALLTHROUGH;
366+
CASE_FMA4_PACKED_RM(FNMSUB)
367+
CASE_FMA4_SCALAR_RM(FNMSUB)
368+
Mul2Name = getRegName(MI->getOperand(2).getReg());
369+
Mul1Name = getRegName(MI->getOperand(1).getReg());
370+
AccStr = "-";
371+
Negate = true;
372+
break;
373+
CASE_FMA4_PACKED_MR(FNMSUB)
374+
CASE_FMA4_SCALAR_MR(FNMSUB)
375+
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
376+
Mul1Name = getRegName(MI->getOperand(1).getReg());
377+
AccStr = "-";
378+
Negate = true;
379+
break;
380+
381+
CASE_FMA4_PACKED_RR(FMADDSUB)
382+
RegForm = true;
383+
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
384+
LLVM_FALLTHROUGH;
385+
CASE_FMA4_PACKED_RM(FMADDSUB)
386+
Mul2Name = getRegName(MI->getOperand(2).getReg());
387+
Mul1Name = getRegName(MI->getOperand(1).getReg());
388+
AccStr = "+/-";
389+
break;
390+
CASE_FMA4_PACKED_MR(FMADDSUB)
391+
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
392+
Mul1Name = getRegName(MI->getOperand(1).getReg());
393+
AccStr = "+/-";
394+
break;
395+
396+
CASE_FMA4_PACKED_RR(FMSUBADD)
397+
RegForm = true;
398+
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
399+
LLVM_FALLTHROUGH;
400+
CASE_FMA4_PACKED_RM(FMSUBADD)
401+
Mul2Name = getRegName(MI->getOperand(2).getReg());
402+
Mul1Name = getRegName(MI->getOperand(1).getReg());
403+
AccStr = "-/+";
404+
break;
405+
CASE_FMA4_PACKED_MR(FMSUBADD)
406+
AccName = getRegName(MI->getOperand(NumOperands - 1).getReg());
407+
Mul1Name = getRegName(MI->getOperand(1).getReg());
408+
AccStr = "-/+";
409+
break;
410+
268411
CASE_FMA_PACKED_REG(FMADD132)
269412
CASE_FMA_SCALAR_REG(FMADD132)
270413
Mul2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
@@ -504,7 +647,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
504647
unsigned NumOperands = MI->getNumOperands();
505648
bool RegForm = false;
506649

507-
if (printFMA3Comments(MI, OS))
650+
if (printFMAComments(MI, OS))
508651
return true;
509652

510653
switch (MI->getOpcode()) {

llvm/test/CodeGen/X86/extended-fma-contraction.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
define <3 x float> @fmafunc(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
66
; CHECK-LABEL: fmafunc:
77
; CHECK: ## %bb.0:
8-
; CHECK-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
8+
; CHECK-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
99
; CHECK-NEXT: retl
1010
;
1111
; CHECK-NOFMA-LABEL: fmafunc:

llvm/test/CodeGen/X86/fma-fneg-combine-2.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ define float @test_fneg_fma_subx_y_negz_f32(float %w, float %x, float %y, float
1212
; FMA4-LABEL: test_fneg_fma_subx_y_negz_f32:
1313
; FMA4: # %bb.0: # %entry
1414
; FMA4-NEXT: vsubss %xmm1, %xmm0, %xmm0
15-
; FMA4-NEXT: vfnmaddss %xmm3, %xmm2, %xmm0, %xmm0
15+
; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm3
1616
; FMA4-NEXT: retq
1717
entry:
1818
%subx = fsub nsz float %w, %x
@@ -32,7 +32,7 @@ define float @test_fneg_fma_x_suby_negz_f32(float %w, float %x, float %y, float
3232
; FMA4-LABEL: test_fneg_fma_x_suby_negz_f32:
3333
; FMA4: # %bb.0: # %entry
3434
; FMA4-NEXT: vsubss %xmm2, %xmm0, %xmm0
35-
; FMA4-NEXT: vfnmaddss %xmm3, %xmm0, %xmm1, %xmm0
35+
; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm3
3636
; FMA4-NEXT: retq
3737
entry:
3838
%suby = fsub nsz float %w, %y
@@ -54,7 +54,7 @@ define float @test_fneg_fma_subx_suby_negz_f32(float %w, float %x, float %y, flo
5454
; FMA4: # %bb.0: # %entry
5555
; FMA4-NEXT: vsubss %xmm1, %xmm0, %xmm1
5656
; FMA4-NEXT: vsubss %xmm2, %xmm0, %xmm0
57-
; FMA4-NEXT: vfnmaddss %xmm3, %xmm0, %xmm1, %xmm0
57+
; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm3
5858
; FMA4-NEXT: retq
5959
entry:
6060
%subx = fsub nsz float %w, %x
@@ -75,7 +75,7 @@ define float @test_fneg_fma_subx_negy_negz_f32(float %w, float %x, float %y, flo
7575
; FMA4-LABEL: test_fneg_fma_subx_negy_negz_f32:
7676
; FMA4: # %bb.0: # %entry
7777
; FMA4-NEXT: vsubss %xmm1, %xmm0, %xmm0
78-
; FMA4-NEXT: vfmaddss %xmm3, %xmm2, %xmm0, %xmm0
78+
; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm3
7979
; FMA4-NEXT: retq
8080
entry:
8181
%subx = fsub nsz float %w, %x
@@ -96,7 +96,7 @@ define <4 x float> @test_fma_rcp_fneg_v4f32(<4 x float> %x, <4 x float> %y, <4 x
9696
; FMA4-LABEL: test_fma_rcp_fneg_v4f32:
9797
; FMA4: # %bb.0: # %entry
9898
; FMA4-NEXT: vrcpps %xmm2, %xmm2
99-
; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
99+
; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
100100
; FMA4-NEXT: retq
101101
entry:
102102
%0 = fneg <4 x float> %z
@@ -118,7 +118,7 @@ define float @negated_constant(float %x) {
118118
; FMA4-LABEL: negated_constant:
119119
; FMA4: # %bb.0:
120120
; FMA4-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
121-
; FMA4-NEXT: vfnmsubss %xmm1, {{.*}}(%rip), %xmm0, %xmm0
121+
; FMA4-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1
122122
; FMA4-NEXT: retq
123123
%m = fmul float %x, 42.0
124124
%fma = call nsz float @llvm.fma.f32(float %x, float -42.0, float %m)

0 commit comments

Comments
 (0)