Skip to content

Commit 22c6674

Browse files
authored
[PowerPC] Add Dense Math binary integer outer-Product accumulate to DMR Instructions (#130791)
This commit adds the following Dense Math Facility integer calculation instructions: dmxvi8gerx4, dmxvi8gerx4pp, dmxvi8gerx4spp, pmdmxvi8gerx4, pmdmxvi8gerx4pp, and pmdmxvi8gerx4spp, along with their corresponding intrinsics and tests.
1 parent a2d7451 commit 22c6674

File tree

8 files changed

+524
-8
lines changed

8 files changed

+524
-8
lines changed

llvm/include/llvm/IR/IntrinsicsPowerPC.td

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,13 @@ multiclass PowerPC_MMA_ACC_PP_Intrinsic<list<LLVMType> args> {
280280
[IntrNoMem]>;
281281
}
282282

283+
multiclass PowerPC_MMA_DMR_PP_Intrinsic<list<LLVMType> args> {
284+
def NAME: DefaultAttrsIntrinsic<[llvm_v1024i1_ty], args, [IntrNoMem]>;
285+
def pp : DefaultAttrsIntrinsic<[llvm_v1024i1_ty],
286+
!listconcat([llvm_v1024i1_ty], args),
287+
[IntrNoMem]>;
288+
}
289+
283290
//===----------------------------------------------------------------------===//
284291
// PowerPC Altivec Intrinsic Class Definitions.
285292
//
@@ -1711,6 +1718,20 @@ let TargetPrefix = "ppc" in {
17111718
[llvm_v512i1_ty, llvm_v16i8_ty, llvm_v16i8_ty,
17121719
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
17131720
[IntrNoMem]>;
1721+
defm int_ppc_mma_dmxvi8gerx4 :
1722+
PowerPC_MMA_DMR_PP_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty]>;
1723+
defm int_ppc_mma_pmdmxvi8gerx4 :
1724+
PowerPC_MMA_DMR_PP_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty, llvm_i32_ty,
1725+
llvm_i32_ty, llvm_i32_ty]>;
1726+
def int_ppc_mma_dmxvi8gerx4spp :
1727+
DefaultAttrsIntrinsic<[llvm_v1024i1_ty],
1728+
[llvm_v1024i1_ty, llvm_v256i1_ty, llvm_v16i8_ty],
1729+
[IntrNoMem]>;
1730+
def int_ppc_mma_pmdmxvi8gerx4spp :
1731+
DefaultAttrsIntrinsic<[llvm_v1024i1_ty],
1732+
[llvm_v1024i1_ty, llvm_v256i1_ty, llvm_v16i8_ty,
1733+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1734+
[IntrNoMem]>;
17141735
}
17151736

17161737
// XL Compat intrinsics.

llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,96 @@ class XForm_ATB3<bits<6> opcode, bits<5> o, bits<10> xo, dag OOL, dag IOL,
6969
let Inst{31} = 0;
7070
}
7171

72+
class XX3Form_AT3_XAp5B6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
73+
string asmstr, InstrItinClass itin,
74+
list<dag> pattern>
75+
: I<opcode, OOL, IOL, asmstr, itin> {
76+
bits<3> AT;
77+
bits<5> XAp;
78+
bits<6> XB;
79+
80+
let Pattern = pattern;
81+
82+
let Inst{6-8} = AT;
83+
let Inst{9-10} = 0;
84+
let Inst{11-14} = XAp{3-0};
85+
let Inst{15} = 0;
86+
let Inst{16-20} = XB{4-0};
87+
let Inst{21-28} = xo;
88+
let Inst{29} = XAp{4};
89+
let Inst{30} = XB{5};
90+
let Inst{31} = 0;
91+
}
92+
93+
class MMIRR_XX3Form_X8YP4_XAp5B6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
94+
string asmstr, InstrItinClass itin,
95+
list<dag> pattern>
96+
: PI<1, opcode, OOL, IOL, asmstr, itin> {
97+
bits<3> AT;
98+
bits<6> XAp;
99+
bits<6> XB;
100+
bits<8> XMSK;
101+
bits<4> YMSK;
102+
bits<4> PMSK;
103+
104+
let Pattern = pattern;
105+
106+
// The prefix.
107+
let Inst{6-7} = 3;
108+
let Inst{8-11} = 9;
109+
let Inst{12-15} = 0;
110+
let Inst{16-19} = PMSK;
111+
let Inst{20-27} = XMSK;
112+
let Inst{28-31} = YMSK;
113+
114+
// The instruction.
115+
let Inst{38-40} = AT;
116+
let Inst{41-42} = 0;
117+
let Inst{43-46} = XAp{3-0};
118+
let Inst{47} = 0;
119+
let Inst{48-52} = XB{4-0};
120+
let Inst{53-60} = xo;
121+
let Inst{61} = XAp{4};
122+
let Inst{62} = XB{5};
123+
let Inst{63} = 0;
124+
}
125+
126+
multiclass DMR_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
127+
string asmstr> {
128+
let Predicates = [MMA, IsISAFuture] in {
129+
def NAME :
130+
XX3Form_AT3_XAp5B6<opcode, !or(xo, 0x01), (outs dmr:$AT), IOL,
131+
!strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
132+
RegConstraint<"@earlyclobber $AT">;
133+
def PP :
134+
XX3Form_AT3_XAp5B6<opcode, xo, (outs dmr:$AT), !con((ins dmr:$ATi), IOL),
135+
!strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
136+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
137+
}
138+
}
139+
140+
multiclass DMR_UM_M448_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
141+
string asmstr> {
142+
defm NAME : DMR_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
143+
let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
144+
def PM#NAME :
145+
MMIRR_XX3Form_X8YP4_XAp5B6<
146+
opcode, !or(xo, 0x01), (outs dmr:$AT),
147+
!con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)),
148+
!strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
149+
IIC_VecFP, []>,
150+
RegConstraint<"@earlyclobber $AT">;
151+
def PM#NAME#PP :
152+
MMIRR_XX3Form_X8YP4_XAp5B6<
153+
opcode, xo, (outs dmr:$AT),
154+
!con((ins dmr:$ATi),
155+
!con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))),
156+
!strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
157+
IIC_VecFP, []>,
158+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
159+
}
160+
}
161+
72162
let Predicates = [IsISAFuture] in {
73163
def DMXXEXTFDMR512 : XX3Form_AT3_XABp5_P1<60, 226,
74164
(outs vsrprc:$XAp, vsrprc:$XBp),
@@ -117,3 +207,56 @@ let Predicates = [IsISAFuture] in {
117207
"dmsetdmrz $AT", NoItinerary,
118208
[(set v1024i1:$AT, (int_ppc_mma_dmsetdmrz))]>;
119209
}
210+
211+
// MMA+ accumulating/non-accumulating instructions.
212+
213+
// DMXVI8GERX4, DMXVI8GERX4PP, PMDMXVI8GERX4, PMDMXVI8GERX4PP
214+
defm DMXVI8GERX4 : DMR_UM_M448_XOEO<59, 10, (ins vsrprc:$XAp, vsrc:$XB),
215+
"dmxvi8gerx4", "$AT, $XAp, $XB">;
216+
217+
let Predicates = [MMA, IsISAFuture] in {
218+
def DMXVI8GERX4SPP :
219+
XX3Form_AT3_XAp5B6<59, 98, (outs dmr:$AT), (ins dmr:$ATi, vsrprc:$XAp, vsrc:$XB),
220+
"dmxvi8gerx4spp $AT, $XAp, $XB", IIC_VecGeneral, []>,
221+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
222+
}
223+
224+
let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
225+
def PMDMXVI8GERX4SPP :
226+
MMIRR_XX3Form_X8YP4_XAp5B6<59, 98, (outs dmr:$AT),
227+
(ins dmr:$ATi, vsrprc:$XAp,vsrc:$XB, u8imm:$XMSK,
228+
u4imm:$YMSK, u4imm:$PMSK),
229+
"pmdmxvi8gerx4spp $AT, $XAp, $XB, $XMSK, $YMSK, $PMSK",
230+
IIC_VecGeneral, []>,
231+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
232+
}
233+
234+
// MMA+ Intrinsics
235+
let Predicates = [MMA, IsISAFuture] in {
236+
def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4 v256i1:$XAp, v16i8:$XB)),
237+
(DMXVI8GERX4 $XAp, RCCp.BToVSRC)>;
238+
def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
239+
(DMXVI8GERX4PP $ATi, $XAp, RCCp.BToVSRC)>;
240+
241+
def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4spp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
242+
(DMXVI8GERX4SPP $ATi, $XAp, RCCp.BToVSRC)>;
243+
}
244+
245+
let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
246+
def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4 v256i1:$XAp, v16i8:$XB, Msk8Imm:$XMSK,
247+
Msk4Imm:$YMSK, Msk4Imm:$PMSK)),
248+
(PMDMXVI8GERX4 $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
249+
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
250+
251+
def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB,
252+
Msk8Imm:$XMSK, Msk4Imm:$YMSK,
253+
Msk4Imm:$PMSK)),
254+
(PMDMXVI8GERX4PP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
255+
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
256+
257+
def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4spp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB,
258+
Msk8Imm:$XMSK, Msk4Imm:$YMSK,
259+
Msk4Imm:$PMSK)),
260+
(PMDMXVI8GERX4SPP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
261+
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
262+
}

llvm/lib/Target/PowerPC/PPCInstrInfo.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3474,6 +3474,13 @@ class PPCAsmPseudo<string asm, dag iops>
34743474
let TSFlags{10} = MemriOp;
34753475
}
34763476

3477+
// Mask immediates for MMA instructions (2, 4 and 8 bits).
3478+
def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>;
3479+
def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
3480+
def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>;
3481+
3482+
def MMA : Predicate<"Subtarget->hasMMA()">;
3483+
34773484
// Prefixed instructions may require access to the above defs at a later
34783485
// time so we include this after the def.
34793486
include "PPCInstrP10.td"

llvm/lib/Target/PowerPC/PPCInstrMMA.td

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,4 @@
11

2-
// Mask immediates for MMA instructions (2, 4 and 8 bits).
3-
def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>;
4-
def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
5-
def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>;
6-
7-
def MMA : Predicate<"Subtarget->hasMMA()">;
8-
9-
102
// Multiclass definitions for MMA accumulator instructions.
113
// ----------------------------------------------------------------------------
124

0 commit comments

Comments
 (0)