Skip to content

Commit edb2791

Browse files
Baptiste SaleilAhsan Saghir
authored andcommitted
[PowerPC] Add intrinsics for MMA
This patch adds support for MMA intrinsics. Authored by: Baptiste Saleil Reviewed By: #powerpc, bsaleil, amyk Differential Revision: https://reviews.llvm.org/D89345
1 parent dd887d9 commit edb2791

File tree

5 files changed

+2767
-2
lines changed

5 files changed

+2767
-2
lines changed

llvm/include/llvm/IR/IntrinsicsPowerPC.td

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,28 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
141141
Intrinsic<ret_types, param_types, properties>;
142142
}
143143

144+
//===----------------------------------------------------------------------===//
145+
// PowerPC MMA Intrinsic Multi Class Definitions.
146+
//
147+
148+
multiclass PowerPC_MMA_ACC_Intrinsic<list<LLVMType> args> {
149+
def NAME: Intrinsic<[llvm_v512i1_ty], args, [IntrNoMem]>;
150+
def pp : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args),
151+
[IntrNoMem]>;
152+
def pn : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args),
153+
[IntrNoMem]>;
154+
def np : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args),
155+
[IntrNoMem]>;
156+
def nn : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args),
157+
[IntrNoMem]>;
158+
}
159+
160+
multiclass PowerPC_MMA_ACC_PP_Intrinsic<list<LLVMType> args> {
161+
def NAME: Intrinsic<[llvm_v512i1_ty], args, [IntrNoMem]>;
162+
def pp : Intrinsic<[llvm_v512i1_ty], !listconcat([llvm_v512i1_ty], args),
163+
[IntrNoMem]>;
164+
}
165+
144166
//===----------------------------------------------------------------------===//
145167
// PowerPC Altivec Intrinsic Class Definitions.
146168
//
@@ -1371,7 +1393,6 @@ def int_ppc_cfence : Intrinsic<[], [llvm_anyint_ty], []>;
13711393
// PowerPC set FPSCR Intrinsic Definitions.
13721394
def int_ppc_setrnd : GCCBuiltin<"__builtin_setrnd">,
13731395
Intrinsic<[llvm_double_ty], [llvm_i32_ty], []>;
1374-
13751396
}
13761397

13771398
let TargetPrefix = "ppc" in {
@@ -1400,5 +1421,60 @@ let TargetPrefix = "ppc" in {
14001421

14011422
def int_ppc_mma_xxsetaccz :
14021423
Intrinsic<[llvm_v512i1_ty], [], [IntrNoMem]>;
1403-
}
14041424

1425+
// MMA Reduced-Precision: Outer Product Intrinsic Definitions.
1426+
defm int_ppc_mma_xvi4ger8 :
1427+
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
1428+
defm int_ppc_mma_pmxvi4ger8 :
1429+
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
1430+
llvm_i32_ty, llvm_i32_ty]>;
1431+
1432+
defm int_ppc_mma_xvi8ger4 :
1433+
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
1434+
defm int_ppc_mma_pmxvi8ger4 :
1435+
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
1436+
llvm_i32_ty, llvm_i32_ty]>;
1437+
1438+
defm int_ppc_mma_xvi16ger2s :
1439+
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
1440+
defm int_ppc_mma_pmxvi16ger2s :
1441+
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
1442+
llvm_i32_ty, llvm_i32_ty]>;
1443+
1444+
defm int_ppc_mma_xvf16ger2 :
1445+
PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
1446+
defm int_ppc_mma_pmxvf16ger2 :
1447+
PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
1448+
llvm_i32_ty, llvm_i32_ty]>;
1449+
defm int_ppc_mma_xvf32ger :
1450+
PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
1451+
defm int_ppc_mma_pmxvf32ger :
1452+
PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
1453+
llvm_i32_ty]>;
1454+
defm int_ppc_mma_xvf64ger :
1455+
PowerPC_MMA_ACC_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty]>;
1456+
defm int_ppc_mma_pmxvf64ger :
1457+
PowerPC_MMA_ACC_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty, llvm_i32_ty,
1458+
llvm_i32_ty]>;
1459+
1460+
// MMA Reduced-Precision: bfloat16 Outer Product Intrinsic Definitions.
1461+
defm int_ppc_mma_xvbf16ger2 :
1462+
PowerPC_MMA_ACC_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
1463+
defm int_ppc_mma_pmxvbf16ger2 :
1464+
PowerPC_MMA_ACC_Intrinsic<
1465+
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
1466+
1467+
// MMA Reduced-Precision: Missing Integer-based Outer Product Operations.
1468+
defm int_ppc_mma_xvi16ger2 :
1469+
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty]>;
1470+
defm int_ppc_mma_pmxvi16ger2 :
1471+
PowerPC_MMA_ACC_PP_Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
1472+
llvm_i32_ty, llvm_i32_ty]>;
1473+
def int_ppc_mma_xvi8ger4spp :
1474+
Intrinsic<[llvm_v512i1_ty],
1475+
[llvm_v512i1_ty, llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
1476+
def int_ppc_mma_pmxvi8ger4spp :
1477+
Intrinsic<[llvm_v512i1_ty],
1478+
[llvm_v512i1_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
1479+
llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
1480+
}

llvm/lib/Target/PowerPC/PPCInstrPrefix.td

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
// Mask immediates for MMA instructions (2, 4 and 8 bits).
2+
def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>;
3+
def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
4+
def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>;
5+
16
//===----------------------------------------------------------------------===//
27
// PowerPC ISA 3.1 specific type constraints.
38
//
@@ -1341,6 +1346,220 @@ defm XVF64GER : ACC_NEG_UM_M42_XOM84C<59, 58, (ins vsrpevenrc:$XA, vsrc:$XB),
13411346
"xvf64ger", "$AT, $XA, $XB">;
13421347
//------------------------------------------------------------------------------
13431348

1349+
// MMA Intrinsics
1350+
let Predicates = [MMA] in {
1351+
def : Pat<(v512i1 (int_ppc_mma_xvi4ger8 v16i8:$XA, v16i8:$XB)),
1352+
(XVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC)>;
1353+
def : Pat<(v512i1 (int_ppc_mma_xvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
1354+
(XVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
1355+
1356+
def : Pat<(v512i1 (int_ppc_mma_xvi8ger4 v16i8:$XA, v16i8:$XB)),
1357+
(XVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC)>;
1358+
def : Pat<(v512i1 (int_ppc_mma_xvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
1359+
(XVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
1360+
1361+
def : Pat<(v512i1 (int_ppc_mma_xvi16ger2s v16i8:$XA, v16i8:$XB)),
1362+
(XVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC)>;
1363+
def : Pat<(v512i1 (int_ppc_mma_xvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
1364+
(XVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
1365+
1366+
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2 v16i8:$XA, v16i8:$XB)),
1367+
(XVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
1368+
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
1369+
(XVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
1370+
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
1371+
(XVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
1372+
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
1373+
(XVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
1374+
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
1375+
(XVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
1376+
1377+
def : Pat<(v512i1 (int_ppc_mma_xvf32ger v16i8:$XA, v16i8:$XB)),
1378+
(XVF32GER RCCp.AToVSRC, RCCp.BToVSRC)>;
1379+
def : Pat<(v512i1 (int_ppc_mma_xvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
1380+
(XVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
1381+
def : Pat<(v512i1 (int_ppc_mma_xvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
1382+
(XVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
1383+
def : Pat<(v512i1 (int_ppc_mma_xvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
1384+
(XVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
1385+
def : Pat<(v512i1 (int_ppc_mma_xvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
1386+
(XVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
1387+
def : Pat<(v512i1 (int_ppc_mma_xvf64ger v256i1:$XA, v16i8:$XB)),
1388+
(XVF64GER $XA, RCCp.BToVSRC)>;
1389+
def : Pat<(v512i1 (int_ppc_mma_xvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
1390+
(XVF64GERPP $ATi, $XA, RCCp.BToVSRC)>;
1391+
def : Pat<(v512i1 (int_ppc_mma_xvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
1392+
(XVF64GERPN $ATi, $XA, RCCp.BToVSRC)>;
1393+
def : Pat<(v512i1 (int_ppc_mma_xvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
1394+
(XVF64GERNP $ATi, $XA, RCCp.BToVSRC)>;
1395+
def : Pat<(v512i1 (int_ppc_mma_xvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
1396+
(XVF64GERNN $ATi, $XA, RCCp.BToVSRC)>;
1397+
1398+
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2 v16i8:$XA, v16i8:$XB)),
1399+
(XVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
1400+
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
1401+
(XVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
1402+
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
1403+
(XVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
1404+
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
1405+
(XVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
1406+
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
1407+
(XVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
1408+
def : Pat<(v512i1 (int_ppc_mma_xvi16ger2 v16i8:$XA, v16i8:$XB)),
1409+
(XVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
1410+
def : Pat<(v512i1 (int_ppc_mma_xvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
1411+
(XVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
1412+
def : Pat<(v512i1 (int_ppc_mma_xvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
1413+
(XVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
1414+
}
1415+
1416+
// MMA Intrinsics
1417+
let Predicates = [MMA, PrefixInstrs] in {
1418+
def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
1419+
Msk4Imm:$YMSK, Msk8Imm:$PMSK)),
1420+
(PMXVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1421+
Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;
1422+
def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
1423+
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
1424+
Msk8Imm:$PMSK)),
1425+
(PMXVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1426+
Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;
1427+
1428+
def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
1429+
Msk4Imm:$YMSK, Msk4Imm:$PMSK)),
1430+
(PMXVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1431+
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
1432+
def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
1433+
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
1434+
Msk4Imm:$PMSK)),
1435+
(PMXVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1436+
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
1437+
1438+
def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2s v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
1439+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
1440+
(PMXVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1441+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
1442+
def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
1443+
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
1444+
Msk2Imm:$PMSK)),
1445+
(PMXVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1446+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
1447+
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
1448+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
1449+
(PMXVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1450+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
1451+
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
1452+
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
1453+
Msk2Imm:$PMSK)),
1454+
(PMXVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1455+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
1456+
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
1457+
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
1458+
Msk2Imm:$PMSK)),
1459+
(PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1460+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
1461+
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
1462+
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
1463+
Msk2Imm:$PMSK)),
1464+
(PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1465+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
1466+
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
1467+
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
1468+
Msk2Imm:$PMSK)),
1469+
(PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1470+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
1471+
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
1472+
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
1473+
Msk2Imm:$PMSK)),
1474+
(PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1475+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
1476+
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
1477+
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
1478+
Msk2Imm:$PMSK)),
1479+
(PMXVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1480+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
1481+
1482+
def : Pat<(v512i1 (int_ppc_mma_pmxvf32ger v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
1483+
Msk4Imm:$YMSK)),
1484+
(PMXVF32GER RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1485+
Msk4Imm:$YMSK)>;
1486+
def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
1487+
Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
1488+
(PMXVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1489+
Msk4Imm:$YMSK)>;
1490+
def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
1491+
Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
1492+
(PMXVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1493+
Msk4Imm:$YMSK)>;
1494+
def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
1495+
Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
1496+
(PMXVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1497+
Msk4Imm:$YMSK)>;
1498+
def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
1499+
Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
1500+
(PMXVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1501+
Msk4Imm:$YMSK)>;
1502+
1503+
def : Pat<(v512i1 (int_ppc_mma_pmxvf64ger v256i1:$XA, v16i8:$XB, Msk4Imm:$XMSK,
1504+
Msk2Imm:$YMSK)),
1505+
(PMXVF64GER $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, Msk2Imm:$YMSK)>;
1506+
def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
1507+
Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
1508+
(PMXVF64GERPP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
1509+
Msk2Imm:$YMSK)>;
1510+
def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
1511+
Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
1512+
(PMXVF64GERPN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
1513+
Msk2Imm:$YMSK)>;
1514+
def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
1515+
Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
1516+
(PMXVF64GERNP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
1517+
Msk2Imm:$YMSK)>;
1518+
def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
1519+
Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
1520+
(PMXVF64GERNN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
1521+
Msk2Imm:$YMSK)>;
1522+
1523+
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
1524+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
1525+
(PMXVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1526+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
1527+
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
1528+
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
1529+
Msk2Imm:$PMSK)),
1530+
(PMXVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1531+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
1532+
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
1533+
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
1534+
Msk2Imm:$PMSK)),
1535+
(PMXVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1536+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
1537+
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
1538+
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
1539+
Msk2Imm:$PMSK)),
1540+
(PMXVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1541+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
1542+
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
1543+
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
1544+
Msk2Imm:$PMSK)),
1545+
(PMXVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1546+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
1547+
def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
1548+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
1549+
(PMXVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1550+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
1551+
def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
1552+
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
1553+
Msk2Imm:$PMSK)),
1554+
(PMXVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1555+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
1556+
def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
1557+
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
1558+
Msk2Imm:$PMSK)),
1559+
(PMXVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
1560+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
1561+
}
1562+
13441563
def Concats {
13451564
dag VecsToVecPair0 =
13461565
(v256i1 (INSERT_SUBREG

0 commit comments

Comments
 (0)