Skip to content

Commit 42a3b4d

Browse files
committed
[PowerPC] Add Dense Math binary integer outer-Product accumulate to DMR Instructions
This commit adds the following Dense Math Facility integer calculation instructions: dmxvi8gerx4, dmxvi8gerx4pp, dmxvi8gerx4spp, pmdmxvi8gerx4, pmdmxvi8gerx4pp, and pmdmxvi8gerx4spp, along with their corresponding intrinsics and tests.
1 parent 1b31646 commit 42a3b4d

File tree

7 files changed

+523
-8
lines changed

7 files changed

+523
-8
lines changed

llvm/include/llvm/IR/IntrinsicsPowerPC.td

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,13 @@ multiclass PowerPC_MMA_ACC_PP_Intrinsic<list<LLVMType> args> {
280280
[IntrNoMem]>;
281281
}
282282

283+
multiclass PowerPC_MMA_DMR_PP_Intrinsic<list<LLVMType> args> {
284+
def NAME: DefaultAttrsIntrinsic<[llvm_v1024i1_ty], args, [IntrNoMem]>;
285+
def pp : DefaultAttrsIntrinsic<[llvm_v1024i1_ty],
286+
!listconcat([llvm_v1024i1_ty], args),
287+
[IntrNoMem]>;
288+
}
289+
283290
//===----------------------------------------------------------------------===//
284291
// PowerPC Altivec Intrinsic Class Definitions.
285292
//
@@ -1711,6 +1718,20 @@ let TargetPrefix = "ppc" in {
17111718
[llvm_v512i1_ty, llvm_v16i8_ty, llvm_v16i8_ty,
17121719
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
17131720
[IntrNoMem]>;
1721+
defm int_ppc_mma_dmxvi8gerx4 :
1722+
PowerPC_MMA_DMR_PP_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty]>;
1723+
defm int_ppc_mma_pmdmxvi8gerx4 :
1724+
PowerPC_MMA_DMR_PP_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty, llvm_i32_ty,
1725+
llvm_i32_ty, llvm_i32_ty]>;
1726+
def int_ppc_mma_dmxvi8gerx4spp :
1727+
DefaultAttrsIntrinsic<[llvm_v1024i1_ty],
1728+
[llvm_v1024i1_ty, llvm_v256i1_ty, llvm_v16i8_ty],
1729+
[IntrNoMem]>;
1730+
def int_ppc_mma_pmdmxvi8gerx4spp :
1731+
DefaultAttrsIntrinsic<[llvm_v1024i1_ty],
1732+
[llvm_v1024i1_ty, llvm_v256i1_ty, llvm_v16i8_ty,
1733+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1734+
[IntrNoMem]>;
17141735
}
17151736

17161737
// XL Compat intrinsics.

llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,13 @@
1111
//
1212
//===----------------------------------------------------------------------===//
1313

14+
// Mask immediates for MMA instructions (2, 4 and 8 bits).
15+
def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>;
16+
def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
17+
def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>;
18+
19+
def MMA : Predicate<"Subtarget->hasMMA()">;
20+
1421
class XX3Form_AT3_XABp5_P1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
1522
string asmstr, list<dag> pattern>
1623
: I<opcode, OOL, IOL, asmstr, NoItinerary> {
@@ -69,6 +76,96 @@ class XForm_ATB3<bits<6> opcode, bits<5> o, bits<10> xo, dag OOL, dag IOL,
6976
let Inst{31} = 0;
7077
}
7178

79+
class XX3Form_AT3_XAp5B6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
80+
string asmstr, InstrItinClass itin,
81+
list<dag> pattern>
82+
: I<opcode, OOL, IOL, asmstr, itin> {
83+
bits<3> AT;
84+
bits<5> XAp;
85+
bits<6> XB;
86+
87+
let Pattern = pattern;
88+
89+
let Inst{6-8} = AT;
90+
let Inst{9-10} = 0;
91+
let Inst{11-14} = XAp{3-0};
92+
let Inst{15} = 0;
93+
let Inst{16-20} = XB{4-0};
94+
let Inst{21-28} = xo;
95+
let Inst{29} = XAp{4};
96+
let Inst{30} = XB{5};
97+
let Inst{31} = 0;
98+
}
99+
100+
class MMIRR_XX3Form_X8YP4_XAp5B6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
101+
string asmstr, InstrItinClass itin,
102+
list<dag> pattern>
103+
: PI<1, opcode, OOL, IOL, asmstr, itin> {
104+
bits<3> AT;
105+
bits<6> XAp;
106+
bits<6> XB;
107+
bits<8> XMSK;
108+
bits<4> YMSK;
109+
bits<4> PMSK;
110+
111+
let Pattern = pattern;
112+
113+
// The prefix.
114+
let Inst{6-7} = 3;
115+
let Inst{8-11} = 9;
116+
let Inst{12-15} = 0;
117+
let Inst{16-19} = PMSK;
118+
let Inst{20-27} = XMSK;
119+
let Inst{28-31} = YMSK;
120+
121+
// The instruction.
122+
let Inst{38-40} = AT;
123+
let Inst{41-42} = 0;
124+
let Inst{43-46} = XAp{3-0};
125+
let Inst{47} = 0;
126+
let Inst{48-52} = XB{4-0};
127+
let Inst{53-60} = xo;
128+
let Inst{61} = XAp{4};
129+
let Inst{62} = XB{5};
130+
let Inst{63} = 0;
131+
}
132+
133+
multiclass DMR_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
134+
string asmstr> {
135+
let Predicates = [IsISAFuture] in {
136+
def NAME :
137+
XX3Form_AT3_XAp5B6<opcode, !or(xo, 0x01), (outs dmr:$AT), IOL,
138+
!strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
139+
RegConstraint<"@earlyclobber $AT">;
140+
def PP :
141+
XX3Form_AT3_XAp5B6<opcode, xo, (outs dmr:$AT), !con((ins dmr:$ATi), IOL),
142+
!strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
143+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
144+
}
145+
}
146+
147+
multiclass DMR_UM_M448_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
148+
string asmstr> {
149+
defm NAME : DMR_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
150+
let Predicates = [IsISAFuture] in {
151+
def PM#NAME :
152+
MMIRR_XX3Form_X8YP4_XAp5B6<
153+
opcode, !or(xo, 0x01), (outs dmr:$AT),
154+
!con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)),
155+
!strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
156+
IIC_VecFP, []>,
157+
RegConstraint<"@earlyclobber $AT">;
158+
def PM#NAME#PP :
159+
MMIRR_XX3Form_X8YP4_XAp5B6<
160+
opcode, xo, (outs dmr:$AT),
161+
!con((ins dmr:$ATi),
162+
!con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))),
163+
!strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
164+
IIC_VecFP, []>,
165+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
166+
}
167+
}
168+
72169
let Predicates = [IsISAFuture] in {
73170
def DMXXEXTFDMR512 : XX3Form_AT3_XABp5_P1<60, 226,
74171
(outs vsrprc:$XAp, vsrprc:$XBp),
@@ -116,4 +213,56 @@ let Predicates = [IsISAFuture] in {
116213
def DMSETDMRZ : XForm_AT3<31, 2, 177, (outs dmr:$AT), (ins),
117214
"dmsetdmrz $AT", NoItinerary,
118215
[(set v1024i1:$AT, (int_ppc_mma_dmsetdmrz))]>;
216+
217+
// MMA+ accumulating/non-accumulating instructions.
218+
219+
// DMXVI8GERX4, DMXVI8GERX4PP, PMDMXVI8GERX4, PMDMXVI8GERX4PP
220+
defm DMXVI8GERX4 : DMR_UM_M448_XOEO<59, 10, (ins vsrprc:$XAp, vsrc:$XB),
221+
"dmxvi8gerx4", "$AT, $XAp, $XB">;
222+
223+
let Predicates = [MMA, IsISAFuture] in {
224+
def DMXVI8GERX4SPP :
225+
XX3Form_AT3_XAp5B6<59, 98, (outs dmr:$AT), (ins dmr:$ATi, vsrprc:$XAp, vsrc:$XB),
226+
"dmxvi8gerx4spp $AT, $XAp, $XB", IIC_VecGeneral, []>,
227+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
228+
}
229+
230+
let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
231+
def PMDMXVI8GERX4SPP :
232+
MMIRR_XX3Form_X8YP4_XAp5B6<59, 98, (outs dmr:$AT),
233+
(ins dmr:$ATi, vsrprc:$XAp,vsrc:$XB, u8imm:$XMSK,
234+
u4imm:$YMSK, u4imm:$PMSK),
235+
"pmdmxvi8gerx4spp $AT, $XAp, $XB, $XMSK, $YMSK, $PMSK",
236+
IIC_VecGeneral, []>,
237+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
238+
}
239+
240+
// MMA+ Intrinsics
241+
let Predicates = [MMA, IsISAFuture] in {
242+
def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4 v256i1:$XAp, v16i8:$XB)),
243+
(DMXVI8GERX4 $XAp, RCCp.BToVSRC)>;
244+
def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
245+
(DMXVI8GERX4PP $ATi, $XAp, RCCp.BToVSRC)>;
246+
247+
def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4spp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
248+
(DMXVI8GERX4SPP $ATi, $XAp, RCCp.BToVSRC)>;
249+
}
250+
251+
let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
252+
def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4 v256i1:$XAp, v16i8:$XB, Msk8Imm:$XMSK,
253+
Msk4Imm:$YMSK, Msk4Imm:$PMSK)),
254+
(PMDMXVI8GERX4 $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
255+
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
256+
257+
def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB,
258+
Msk8Imm:$XMSK, Msk4Imm:$YMSK,
259+
Msk4Imm:$PMSK)),
260+
(PMDMXVI8GERX4PP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
261+
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
262+
263+
def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4spp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB,
264+
Msk8Imm:$XMSK, Msk4Imm:$YMSK,
265+
Msk2Imm:$PMSK)),
266+
(PMDMXVI8GERX4SPP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
267+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
119268
}

llvm/lib/Target/PowerPC/PPCInstrMMA.td

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,4 @@
11

2-
// Mask immediates for MMA instructions (2, 4 and 8 bits).
3-
def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>;
4-
def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
5-
def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>;
6-
7-
def MMA : Predicate<"Subtarget->hasMMA()">;
8-
9-
102
// Multiclass definitions for MMA accumulator instructions.
113
// ----------------------------------------------------------------------------
124

0 commit comments

Comments
 (0)