Skip to content

Commit 0156914

Browse files
author
Baptiste Saleil
committed
[PowerPC] Legalize v256i1 and v512i1 and implement load and store of these types
This patch legalizes the v256i1 and v512i1 types that will be used for MMA. It implements loads and stores of these types. v256i1 is a pair of VSX registers, so for this type, we load/store the two underlying registers. v512i1 is used for MMA accumulators. So in addition to loading and storing the 4 associated VSX registers, we generate instructions to prime (copy the VSX registers to the accumulator) after loading and unprime (copy the accumulator back to the VSX registers) before storing. This patch also adds the UACC register class that is necessary to implement the loads and stores. This class represents accumulator in their unprimed form and allow the distinction between primed and unprimed accumulators to avoid invalid copies of the VSX registers associated with primed accumulators. Differential Revision: https://reviews.llvm.org/D84968
1 parent 33125cf commit 0156914

File tree

9 files changed

+532
-5
lines changed

9 files changed

+532
-5
lines changed

clang/lib/Basic/Targets/PPC.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -404,19 +404,20 @@ class LLVM_LIBRARY_VISIBILITY PPC64TargetInfo : public PPCTargetInfo {
404404
LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
405405
IntMaxType = SignedLong;
406406
Int64Type = SignedLong;
407+
std::string DataLayout = "";
407408

408409
if (Triple.isOSAIX()) {
409410
// TODO: Set appropriate ABI for AIX platform.
410-
resetDataLayout("E-m:a-i64:64-n32:64");
411+
DataLayout = "E-m:a-i64:64-n32:64";
411412
SuitableAlign = 64;
412413
LongDoubleWidth = 64;
413414
LongDoubleAlign = DoubleAlign = 32;
414415
LongDoubleFormat = &llvm::APFloat::IEEEdouble();
415416
} else if ((Triple.getArch() == llvm::Triple::ppc64le)) {
416-
resetDataLayout("e-m:e-i64:64-n32:64");
417+
DataLayout = "e-m:e-i64:64-n32:64";
417418
ABI = "elfv2";
418419
} else {
419-
resetDataLayout("E-m:e-i64:64-n32:64");
420+
DataLayout = "E-m:e-i64:64-n32:64";
420421
ABI = "elfv1";
421422
}
422423

@@ -425,6 +426,10 @@ class LLVM_LIBRARY_VISIBILITY PPC64TargetInfo : public PPCTargetInfo {
425426
LongDoubleFormat = &llvm::APFloat::IEEEdouble();
426427
}
427428

429+
if (Triple.isOSAIX() || Triple.isOSLinux())
430+
DataLayout += "-v256:256:256-v512:512:512";
431+
resetDataLayout(DataLayout);
432+
428433
// PPC64 supports atomics up to 8 bytes.
429434
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
430435
}

clang/test/CodeGen/target-data.c

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,11 +136,27 @@
136136

137137
// RUN: %clang_cc1 -triple powerpc64-linux -o - -emit-llvm %s | \
138138
// RUN: FileCheck %s -check-prefix=PPC64-LINUX
139-
// PPC64-LINUX: target datalayout = "E-m:e-i64:64-n32:64"
139+
// PPC64-LINUX: target datalayout = "E-m:e-i64:64-n32:64-v256:256:256-v512:512:512"
140+
141+
// RUN: %clang_cc1 -triple powerpc64-linux -o - -emit-llvm -target-cpu future %s | \
142+
// RUN: FileCheck %s -check-prefix=PPC64-FUTURE
143+
// PPC64-FUTURE: target datalayout = "E-m:e-i64:64-n32:64-v256:256:256-v512:512:512"
144+
145+
// RUN: %clang_cc1 -triple powerpc64-linux -o - -emit-llvm -target-cpu pwr10 %s | \
146+
// RUN: FileCheck %s -check-prefix=PPC64-P10
147+
// PPC64-P10: target datalayout = "E-m:e-i64:64-n32:64-v256:256:256-v512:512:512"
140148

141149
// RUN: %clang_cc1 -triple powerpc64le-linux -o - -emit-llvm %s | \
142150
// RUN: FileCheck %s -check-prefix=PPC64LE-LINUX
143-
// PPC64LE-LINUX: target datalayout = "e-m:e-i64:64-n32:64"
151+
// PPC64LE-LINUX: target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512"
152+
153+
// RUN: %clang_cc1 -triple powerpc64le-linux -o - -emit-llvm -target-cpu future %s | \
154+
// RUN: FileCheck %s -check-prefix=PPC64LE-FUTURE
155+
// PPC64LE-FUTURE: target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512"
156+
157+
// RUN: %clang_cc1 -triple powerpc64le-linux -o - -emit-llvm -target-cpu pwr10 %s | \
158+
// RUN: FileCheck %s -check-prefix=PPC64LE-P10
159+
// PPC64LE-P10: target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512"
144160

145161
// RUN: %clang_cc1 -triple nvptx-unknown -o - -emit-llvm %s | \
146162
// RUN: FileCheck %s -check-prefix=NVPTX

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1181,6 +1181,18 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
11811181
}
11821182
}
11831183

1184+
if (Subtarget.pairedVectorMemops()) {
1185+
addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1186+
setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1187+
setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1188+
}
1189+
if (Subtarget.hasMMA()) {
1190+
addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1191+
setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1192+
setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1193+
setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
1194+
}
1195+
11841196
if (Subtarget.has64BitSupport())
11851197
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
11861198

@@ -1523,6 +1535,10 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
15231535
return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
15241536
case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
15251537
return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1538+
case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1539+
case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1540+
case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1541+
case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
15261542
case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
15271543
case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
15281544
case PPCISD::STRICT_FADDRTZ:
@@ -7824,6 +7840,8 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
78247840
}
78257841

78267842
SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7843+
if (Op.getValueType().isVector())
7844+
return LowerVectorLoad(Op, DAG);
78277845

78287846
assert(Op.getValueType() == MVT::i1 &&
78297847
"Custom lowering only for i1 loads");
@@ -7847,6 +7865,9 @@ SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
78477865
}
78487866

78497867
SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7868+
if (Op.getOperand(1).getValueType().isVector())
7869+
return LowerVectorStore(Op, DAG);
7870+
78507871
assert(Op.getOperand(1).getValueType() == MVT::i1 &&
78517872
"Custom lowering only for i1 stores");
78527873

@@ -10581,6 +10602,94 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
1058110602
return Op;
1058210603
}
1058310604

10605+
SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
10606+
SelectionDAG &DAG) const {
10607+
SDLoc dl(Op);
10608+
LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
10609+
SDValue LoadChain = LN->getChain();
10610+
SDValue BasePtr = LN->getBasePtr();
10611+
EVT VT = Op.getValueType();
10612+
10613+
if (VT != MVT::v256i1 && VT != MVT::v512i1)
10614+
return Op;
10615+
10616+
// Type v256i1 is used for pairs and v512i1 is used for accumulators.
10617+
// Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
10618+
// 2 or 4 vsx registers.
10619+
assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
10620+
"Type unsupported without MMA");
10621+
assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10622+
"Type unsupported without paired vector support");
10623+
Align Alignment = LN->getAlign();
10624+
SmallVector<SDValue, 4> Loads;
10625+
SmallVector<SDValue, 4> LoadChains;
10626+
unsigned NumVecs = VT.getSizeInBits() / 128;
10627+
for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10628+
SDValue Load =
10629+
DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
10630+
LN->getPointerInfo().getWithOffset(Idx * 16),
10631+
commonAlignment(Alignment, Idx * 16),
10632+
LN->getMemOperand()->getFlags(), LN->getAAInfo());
10633+
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10634+
DAG.getConstant(16, dl, BasePtr.getValueType()));
10635+
Loads.push_back(Load);
10636+
LoadChains.push_back(Load.getValue(1));
10637+
}
10638+
if (Subtarget.isLittleEndian()) {
10639+
std::reverse(Loads.begin(), Loads.end());
10640+
std::reverse(LoadChains.begin(), LoadChains.end());
10641+
}
10642+
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
10643+
SDValue Value =
10644+
DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
10645+
dl, VT, Loads);
10646+
SDValue RetOps[] = {Value, TF};
10647+
return DAG.getMergeValues(RetOps, dl);
10648+
}
10649+
10650+
SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
10651+
SelectionDAG &DAG) const {
10652+
SDLoc dl(Op);
10653+
StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
10654+
SDValue StoreChain = SN->getChain();
10655+
SDValue BasePtr = SN->getBasePtr();
10656+
SDValue Value = SN->getValue();
10657+
EVT StoreVT = Value.getValueType();
10658+
10659+
if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
10660+
return Op;
10661+
10662+
// Type v256i1 is used for pairs and v512i1 is used for accumulators.
10663+
// Here we create 2 or 4 v16i8 stores to store the pair or accumulator
10664+
// underlying registers individually.
10665+
assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
10666+
"Type unsupported without MMA");
10667+
assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10668+
"Type unsupported without paired vector support");
10669+
Align Alignment = SN->getAlign();
10670+
SmallVector<SDValue, 4> Stores;
10671+
unsigned NumVecs = 2;
10672+
if (StoreVT == MVT::v512i1) {
10673+
Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
10674+
NumVecs = 4;
10675+
}
10676+
for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10677+
unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
10678+
SDValue Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
10679+
DAG.getConstant(VecNum, dl, MVT::i64));
10680+
SDValue Store =
10681+
DAG.getStore(StoreChain, dl, Elt, BasePtr,
10682+
SN->getPointerInfo().getWithOffset(Idx * 16),
10683+
commonAlignment(Alignment, Idx * 16),
10684+
SN->getMemOperand()->getFlags(), SN->getAAInfo());
10685+
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10686+
DAG.getConstant(16, dl, BasePtr.getValueType()));
10687+
Stores.push_back(Store);
10688+
}
10689+
SDValue TF = DAG.getTokenFactor(dl, Stores);
10690+
return TF;
10691+
}
10692+
1058410693
SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
1058510694
SDLoc dl(Op);
1058610695
if (Op.getValueType() == MVT::v4i32) {

llvm/lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,21 @@ namespace llvm {
450450
/// available. This is used with ADD_TLS to produce an add like PADDI.
451451
TLS_LOCAL_EXEC_MAT_ADDR,
452452

453+
/// ACC_BUILD = Build an accumulator register from 4 VSX registers.
454+
ACC_BUILD,
455+
456+
/// PAIR_BUILD = Build a vector pair register from 2 VSX registers.
457+
PAIR_BUILD,
458+
459+
/// EXTRACT_VSX_REG = Extract one of the underlying vsx registers of
460+
/// an accumulator or pair register. This node is needed because
461+
/// EXTRACT_SUBVECTOR expects the input and output vectors to have the same
462+
/// element type.
463+
EXTRACT_VSX_REG,
464+
465+
/// XXMFACC = This corresponds to the xxmfacc instruction.
466+
XXMFACC,
467+
453468
// Constrained conversion from floating point to int
454469
STRICT_FCTIDZ = ISD::FIRST_TARGET_STRICTFP_OPCODE,
455470
STRICT_FCTIWZ,

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2465,6 +2465,31 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
24652465
auto DL = MI.getDebugLoc();
24662466

24672467
switch (MI.getOpcode()) {
2468+
case PPC::BUILD_UACC: {
2469+
MCRegister ACC = MI.getOperand(0).getReg();
2470+
MCRegister UACC = MI.getOperand(1).getReg();
2471+
if (ACC - PPC::ACC0 != UACC - PPC::UACC0) {
2472+
MCRegister SrcVSR = PPC::VSL0 + (UACC - PPC::UACC0) * 4;
2473+
MCRegister DstVSR = PPC::VSL0 + (ACC - PPC::ACC0) * 4;
2474+
// FIXME: This can easily be improved to look up to the top of the MBB
2475+
// to see if the inputs are XXLOR's. If they are and SrcReg is killed,
2476+
// we can just re-target any such XXLOR's to DstVSR + offset.
2477+
for (int VecNo = 0; VecNo < 4; VecNo++)
2478+
BuildMI(MBB, MI, DL, get(PPC::XXLOR), DstVSR + VecNo)
2479+
.addReg(SrcVSR + VecNo)
2480+
.addReg(SrcVSR + VecNo);
2481+
}
2482+
// BUILD_UACC is expanded to 4 copies of the underlying vsx regisers.
2483+
// So after building the 4 copies, we can replace the BUILD_UACC instruction
2484+
// with a NOP.
2485+
LLVM_FALLTHROUGH;
2486+
}
2487+
case PPC::KILL_PAIR: {
2488+
MI.setDesc(get(PPC::UNENCODED_NOP));
2489+
MI.RemoveOperand(1);
2490+
MI.RemoveOperand(0);
2491+
return true;
2492+
}
24682493
case TargetOpcode::LOAD_STACK_GUARD: {
24692494
assert(Subtarget.isTargetLinux() &&
24702495
"Only Linux target is expected to contain LOAD_STACK_GUARD");

llvm/lib/Target/PowerPC/PPCInstrPrefix.td

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,35 @@
55
def SDT_PPCSplat32 : SDTypeProfile<1, 3, [ SDTCisVT<0, v2i64>,
66
SDTCisVec<1>, SDTCisInt<2>, SDTCisInt<3>
77
]>;
8+
def SDT_PPCAccBuild : SDTypeProfile<1, 4, [
9+
SDTCisVT<0, v512i1>, SDTCisVT<1, v4i32>, SDTCisVT<2, v4i32>,
10+
SDTCisVT<3, v4i32>, SDTCisVT<4, v4i32>
11+
]>;
12+
def SDT_PPCPairBuild : SDTypeProfile<1, 2, [
13+
SDTCisVT<0, v256i1>, SDTCisVT<1, v4i32>, SDTCisVT<2, v4i32>
14+
]>;
15+
def SDT_PPCAccExtractVsx : SDTypeProfile<1, 2, [
16+
SDTCisVT<0, v4i32>, SDTCisVT<1, v512i1>, SDTCisInt<2>
17+
]>;
18+
def SDT_PPCPairExtractVsx : SDTypeProfile<1, 2, [
19+
SDTCisVT<0, v4i32>, SDTCisVT<1, v256i1>, SDTCisInt<2>
20+
]>;
21+
def SDT_PPCxxmfacc : SDTypeProfile<1, 1, [
22+
SDTCisVT<0, v512i1>, SDTCisVT<1, v512i1>
23+
]>;
824

925
//===----------------------------------------------------------------------===//
1026
// ISA 3.1 specific PPCISD nodes.
1127
//
1228

1329
def PPCxxsplti32dx : SDNode<"PPCISD::XXSPLTI32DX", SDT_PPCSplat32, []>;
30+
def PPCAccBuild : SDNode<"PPCISD::ACC_BUILD", SDT_PPCAccBuild, []>;
31+
def PPCPairBuild : SDNode<"PPCISD::PAIR_BUILD", SDT_PPCPairBuild, []>;
32+
def PPCAccExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCAccExtractVsx,
33+
[]>;
34+
def PPCPairExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCPairExtractVsx,
35+
[]>;
36+
def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>;
1437

1538
//===----------------------------------------------------------------------===//
1639

@@ -525,6 +548,16 @@ def vsrprc : RegisterOperand<VSRpRC> {
525548
let ParserMatchClass = PPCRegVSRpRCAsmOperand;
526549
}
527550

551+
def PPCRegVSRpEvenRCAsmOperand : AsmOperandClass {
552+
let Name = "RegVSRpEvenRC"; let PredicateMethod = "isVSRpEvenRegNumber";
553+
}
554+
555+
def vsrpevenrc : RegisterOperand<VSRpRC> {
556+
let ParserMatchClass = PPCRegVSRpEvenRCAsmOperand;
557+
let EncoderMethod = "getVSRpEvenEncoding";
558+
let DecoderMethod = "decodeVSRpEvenOperands";
559+
}
560+
528561
class DQForm_XTp5_RA17_MEM<bits<6> opcode, bits<4> xo, dag OOL, dag IOL,
529562
string asmstr, InstrItinClass itin, list<dag> pattern>
530563
: I<opcode, OOL, IOL, asmstr, itin> {
@@ -594,6 +627,10 @@ def acc : RegisterOperand<ACCRC> {
594627
let ParserMatchClass = PPCRegACCRCAsmOperand;
595628
}
596629

630+
def uacc : RegisterOperand<UACCRC> {
631+
let ParserMatchClass = PPCRegACCRCAsmOperand;
632+
}
633+
597634
// [PO AS XO2 XO]
598635
class XForm_AT3<bits<6> opcode, bits<5> xo2, bits<10> xo, dag OOL, dag IOL,
599636
string asmstr, InstrItinClass itin, list<dag> pattern>
@@ -774,6 +811,11 @@ let Predicates = [MMA] in {
774811
XForm_AT3<31, 1, 177, (outs acc:$AT), (ins acc:$ATi), "xxmtacc $AT",
775812
IIC_VecGeneral, []>, RegConstraint<"$ATi = $AT">,
776813
NoEncode<"$ATi">;
814+
def KILL_PAIR : PPCPostRAExpPseudo<(outs vsrprc:$XTp), (ins vsrprc:$XSp),
815+
"#KILL_PAIR", []>,
816+
RegConstraint<"$XTp = $XSp">;
817+
def BUILD_UACC : PPCPostRAExpPseudo<(outs acc:$AT), (ins uacc:$AS),
818+
"#BUILD_UACC $AT, $AS", []>;
777819
// We define XXSETACCZ as rematerializable to undo CSE of that intrinsic in
778820
// the backend. We avoid CSE here because it generates a copy of the acc
779821
// register and this copy is more expensive than calling the intrinsic again.
@@ -784,6 +826,51 @@ let Predicates = [MMA] in {
784826
}
785827
}
786828

829+
def Concats {
830+
dag VecsToVecPair0 =
831+
(v256i1 (INSERT_SUBREG
832+
(INSERT_SUBREG (IMPLICIT_DEF), $vs0, sub_vsx1),
833+
$vs1, sub_vsx0));
834+
dag VecsToVecPair1 =
835+
(v256i1 (INSERT_SUBREG
836+
(INSERT_SUBREG (IMPLICIT_DEF), $vs2, sub_vsx1),
837+
$vs3, sub_vsx0));
838+
dag VecsToVecQuad =
839+
(BUILD_UACC (INSERT_SUBREG
840+
(INSERT_SUBREG (v512i1 (IMPLICIT_DEF)),
841+
(KILL_PAIR VecsToVecPair0), sub_pair0),
842+
(KILL_PAIR VecsToVecPair1), sub_pair1));
843+
}
844+
845+
def Extracts {
846+
dag Pair0 = (v256i1 (EXTRACT_SUBREG $v, sub_pair0));
847+
dag Pair1 = (v256i1 (EXTRACT_SUBREG $v, sub_pair1));
848+
dag Vec0 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx0));
849+
dag Vec1 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx1));
850+
dag Vec2 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx0));
851+
dag Vec3 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx1));
852+
}
853+
854+
let Predicates = [MMA] in {
855+
def : Pat<(v512i1 (PPCAccBuild v4i32:$vs1, v4i32:$vs0, v4i32:$vs3, v4i32:$vs2)),
856+
(XXMTACC Concats.VecsToVecQuad)>;
857+
def : Pat<(v256i1 (PPCPairBuild v4i32:$vs1, v4i32:$vs0)),
858+
Concats.VecsToVecPair0>;
859+
def : Pat<(v512i1 (PPCxxmfacc v512i1:$AS)), (XXMFACC acc:$AS)>;
860+
def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 0))),
861+
Extracts.Vec0>;
862+
def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 1))),
863+
Extracts.Vec1>;
864+
def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 2))),
865+
Extracts.Vec2>;
866+
def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 3))),
867+
Extracts.Vec3>;
868+
def : Pat<(v4i32 (PPCPairExtractVsx vsrpevenrc:$v, (i64 0))),
869+
(v4i32 (EXTRACT_SUBREG $v, sub_vsx0))>;
870+
def : Pat<(v4i32 (PPCPairExtractVsx vsrpevenrc:$v, (i64 1))),
871+
(v4i32 (EXTRACT_SUBREG $v, sub_vsx1))>;
872+
}
873+
787874
let mayLoad = 1, mayStore = 0, Predicates = [PairedVectorMemops] in {
788875
def LXVP : DQForm_XTp5_RA17_MEM<6, 0, (outs vsrprc:$XTp),
789876
(ins memrix16:$DQ_RA), "lxvp $XTp, $DQ_RA",

0 commit comments

Comments
 (0)