Skip to content

Commit 82a1d50

Browse files
authored
[PowerPC] Add dense math half-precision floating-point outer-product accumulate to DMR instructions (#133272)
This patch adds the following Dense Math Facility 16-bit half-precision floating-point calculation instructions: dmxvf16gerx2, dmxvf16gerx2pp, dmxvf16gerx2pn, dmxvf16gerx2np, dmxvf16gerx2nn, pmdmxvf16gerx2, pmdmxvf16gerx2pp, pmdmxvf16gerx2pn, pmdmxvf16gerx2np, pmdmxvf16gerx2nn, along with their corresponding intrinsics and tests.
1 parent 31bd7a5 commit 82a1d50

File tree

6 files changed

+714
-2
lines changed

6 files changed

+714
-2
lines changed

llvm/include/llvm/IR/IntrinsicsPowerPC.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1771,6 +1771,13 @@ let TargetPrefix = "ppc" in {
17711771
defm int_ppc_mma_pmdmxvbf16gerx2 :
17721772
PowerPC_MMA_DMR_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty, llvm_i32_ty,
17731773
llvm_i32_ty, llvm_i32_ty]>;
1774+
1775+
// MMA+ Half-precision Outer Product Intrinsic Definitions.
1776+
defm int_ppc_mma_dmxvf16gerx2 :
1777+
PowerPC_MMA_DMR_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty]>;
1778+
defm int_ppc_mma_pmdmxvf16gerx2 :
1779+
PowerPC_MMA_DMR_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty, llvm_i32_ty,
1780+
llvm_i32_ty, llvm_i32_ty]>;
17741781
}
17751782

17761783
// XL Compat intrinsics.

llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td

Lines changed: 114 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ multiclass DMR_BF16_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
207207
}
208208
}
209209

210-
multiclass DMR_UM_M284_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
210+
multiclass DMR_BF16_UM_M284_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
211211
string asmstr> {
212212
defm NAME : DMR_BF16_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
213213
let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
@@ -228,9 +228,30 @@ multiclass DMR_UM_M284_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
228228
}
229229
}
230230

231+
multiclass DMR_F16_UM_M284_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
232+
string asmstr> {
233+
defm NAME : DMR_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
234+
let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
235+
def PM#NAME :
236+
MMIRR_XX3Form_X8Y4P2_XAp5B6<
237+
opcode, !or(xo, 0x01), (outs dmr:$AT),
238+
!con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)),
239+
!strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
240+
IIC_VecFP, []>,
241+
RegConstraint<"@earlyclobber $AT">;
242+
def PM#NAME#PP :
243+
MMIRR_XX3Form_X8Y4P2_XAp5B6<
244+
opcode, xo, (outs dmr:$AT),
245+
!con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
246+
!strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
247+
IIC_VecFP, []>,
248+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
249+
}
250+
}
251+
231252
multiclass DMR_NEG_UM_M284_XOXORf939a0<bits<6> opcode, bits<8> xo, dag IOL,
232253
string asmbase, string asmstr> {
233-
defm NAME : DMR_UM_M284_XOEO<opcode, xo, IOL, asmbase, asmstr>;
254+
defm NAME : DMR_BF16_UM_M284_XOEO<opcode, xo, IOL, asmbase, asmstr>;
234255
let Predicates = [MMA, IsISAFuture] in {
235256
def PN : XX3Form_AT3_XAp5B6<
236257
opcode, !xor(xo, 0xF9), (outs dmr:$AT), !con((ins dmr:$ATi), IOL),
@@ -270,6 +291,48 @@ multiclass DMR_NEG_UM_M284_XOXORf939a0<bits<6> opcode, bits<8> xo, dag IOL,
270291
}
271292
}
272293

294+
multiclass DMR_NEG_UM_M284_XOXORd11188<bits<6> opcode, bits<8> xo, dag IOL,
295+
string asmbase, string asmstr> {
296+
defm NAME : DMR_F16_UM_M284_XOEO<opcode, xo, IOL, asmbase, asmstr>;
297+
let Predicates = [MMA, IsISAFuture] in {
298+
def PN : XX3Form_AT3_XAp5B6<
299+
opcode, !xor(xo, 0xD1), (outs dmr:$AT), !con((ins dmr:$ATi), IOL),
300+
!strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>,
301+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
302+
def NP : XX3Form_AT3_XAp5B6<
303+
opcode, !xor(xo, 0x11), (outs dmr:$AT), !con((ins dmr:$ATi), IOL),
304+
!strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>,
305+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
306+
def NN : XX3Form_AT3_XAp5B6<
307+
opcode, !xor(xo, 0x88), (outs dmr:$AT), !con((ins dmr:$ATi), IOL),
308+
!strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>,
309+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
310+
}
311+
let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
312+
def PM#NAME#PN :
313+
MMIRR_XX3Form_X8Y4P2_XAp5B6<
314+
opcode, !xor(xo, 0xD1), (outs dmr:$AT),
315+
!con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
316+
!strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"),
317+
IIC_VecFP, []>,
318+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
319+
def PM#NAME#NP :
320+
MMIRR_XX3Form_X8Y4P2_XAp5B6<
321+
opcode, !xor(xo, 0x11), (outs dmr:$AT),
322+
!con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
323+
!strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"),
324+
IIC_VecFP, []>,
325+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
326+
def PM#NAME#NN :
327+
MMIRR_XX3Form_X8Y4P2_XAp5B6<
328+
opcode, !xor(xo, 0x88), (outs dmr:$AT),
329+
!con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
330+
!strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"),
331+
IIC_VecFP, []>,
332+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
333+
}
334+
}
335+
273336
let Predicates = [IsISAFuture] in {
274337
def DMXXEXTFDMR512 : XX3Form_AT3_XABp5_P1<60, 226,
275338
(outs vsrprc:$XAp, vsrprc:$XBp),
@@ -347,6 +410,11 @@ let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
347410
defm DMXVBF16GERX2 : DMR_NEG_UM_M284_XOXORf939a0<59, 74, (ins vsrprc:$XAp, vsrc:$XB),
348411
"dmxvbf16gerx2", "$AT, $XAp, $XB">;
349412

413+
// DMXVF16GERX2, DMXVF16GERX2PP, DMXVF16GERX2PN, dMXVF16GERX2NP, DMXVF16GERX2NN
414+
// PMDMXVF16GERX2, PMDMXVF16GERX2PP, PMDMXVF16GERX2PN, PMDMXVF16GERX2NP, PMDMXVF16GERX2NN
415+
defm DMXVF16GERX2 : DMR_NEG_UM_M284_XOXORd11188<59, 66, (ins vsrprc:$XAp, vsrc:$XB),
416+
"dmxvf16gerx2", "$AT, $XAp, $XB">;
417+
350418
// MMA+ Intrinsics
351419
let Predicates = [MMA, IsISAFuture] in {
352420
def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4 v256i1:$XAp, v16i8:$XB)),
@@ -371,6 +439,21 @@ let Predicates = [MMA, IsISAFuture] in {
371439

372440
def : Pat<(v1024i1 (int_ppc_mma_dmxvbf16gerx2nn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
373441
(DMXVBF16GERX2NN $ATi, $XAp, RCCp.BToVSRC)>;
442+
443+
def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2 v256i1:$XAp, v16i8:$XB)),
444+
(DMXVF16GERX2 $XAp, RCCp.BToVSRC)>;
445+
446+
def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
447+
(DMXVF16GERX2PP $ATi, $XAp, RCCp.BToVSRC)>;
448+
449+
def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2pn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
450+
(DMXVF16GERX2PN $ATi, $XAp, RCCp.BToVSRC)>;
451+
452+
def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2np v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
453+
(DMXVF16GERX2NP $ATi, $XAp, RCCp.BToVSRC)>;
454+
455+
def : Pat<(v1024i1 (int_ppc_mma_dmxvf16gerx2nn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
456+
(DMXVF16GERX2NN $ATi, $XAp, RCCp.BToVSRC)>;
374457
}
375458

376459
let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
@@ -419,4 +502,33 @@ let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
419502
Msk2Imm:$PMSK)),
420503
(PMDMXVBF16GERX2NN $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
421504
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
505+
506+
def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2 v256i1:$XAp, v16i8:$XB, Msk8Imm:$XMSK,
507+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
508+
(PMDMXVF16GERX2 $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
509+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
510+
511+
def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB,
512+
Msk8Imm:$XMSK, Msk4Imm:$YMSK,
513+
Msk2Imm:$PMSK)),
514+
(PMDMXVF16GERX2PP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
515+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
516+
517+
def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2pn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB,
518+
Msk8Imm:$XMSK, Msk4Imm:$YMSK,
519+
Msk2Imm:$PMSK)),
520+
(PMDMXVF16GERX2PN $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
521+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
522+
523+
def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2np v1024i1:$ATi, v256i1:$XAp, v16i8:$XB,
524+
Msk8Imm:$XMSK, Msk4Imm:$YMSK,
525+
Msk2Imm:$PMSK)),
526+
(PMDMXVF16GERX2NP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
527+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
528+
529+
def : Pat<(v1024i1 (int_ppc_mma_pmdmxvf16gerx2nn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB,
530+
Msk8Imm:$XMSK, Msk4Imm:$YMSK,
531+
Msk2Imm:$PMSK)),
532+
(PMDMXVF16GERX2NN $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
533+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
422534
}

0 commit comments

Comments
 (0)