Skip to content

Commit 05f1ca7

Browse files
authored
[PowerPC] Spill and restore DMR register (llvm#141530)
Add spilling and restoring of DMR registers.
1 parent def17cd commit 05f1ca7

File tree

6 files changed

+272
-0
lines changed

6 files changed

+272
-0
lines changed

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1921,6 +1921,14 @@ unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const {
19211921
OpcodeIndex = SOK_PairedVecSpill;
19221922
} else if (PPC::G8pRCRegClass.hasSubClassEq(RC)) {
19231923
OpcodeIndex = SOK_PairedG8Spill;
1924+
} else if (PPC::DMRROWRCRegClass.hasSubClassEq(RC)) {
1925+
llvm_unreachable("TODO: Implement spill DMRROW regclass!");
1926+
} else if (PPC::DMRROWpRCRegClass.hasSubClassEq(RC)) {
1927+
llvm_unreachable("TODO: Implement spill DMRROWp regclass!");
1928+
} else if (PPC::DMRpRCRegClass.hasSubClassEq(RC)) {
1929+
llvm_unreachable("TODO: Implement spill DMRp regclass!");
1930+
} else if (PPC::DMRRCRegClass.hasSubClassEq(RC)) {
1931+
OpcodeIndex = SOK_DMRSpill;
19241932
} else {
19251933
llvm_unreachable("Unknown regclass!");
19261934
}

llvm/lib/Target/PowerPC/PPCInstrInfo.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ enum SpillOpcodeKey {
8181
SOK_AccumulatorSpill,
8282
SOK_UAccumulatorSpill,
8383
SOK_WAccumulatorSpill,
84+
SOK_DMRSpill,
8485
SOK_SPESpill,
8586
SOK_PairedG8Spill,
8687
SOK_LastOpcodeSpill // This must be last on the enum.
@@ -117,6 +118,7 @@ enum PPCMachineCombinerPattern : unsigned {
117118
NoInstr, \
118119
NoInstr, \
119120
NoInstr, \
121+
NoInstr, \
120122
PPC::EVLDD, \
121123
PPC::RESTORE_QUADWORD}
122124

@@ -137,6 +139,7 @@ enum PPCMachineCombinerPattern : unsigned {
137139
NoInstr, \
138140
NoInstr, \
139141
NoInstr, \
142+
NoInstr, \
140143
PPC::RESTORE_QUADWORD}
141144

142145
#define Pwr10LoadOpcodes \
@@ -156,6 +159,7 @@ enum PPCMachineCombinerPattern : unsigned {
156159
PPC::RESTORE_UACC, \
157160
NoInstr, \
158161
NoInstr, \
162+
NoInstr, \
159163
PPC::RESTORE_QUADWORD}
160164

161165
#define FutureLoadOpcodes \
@@ -174,6 +178,7 @@ enum PPCMachineCombinerPattern : unsigned {
174178
PPC::RESTORE_ACC, \
175179
PPC::RESTORE_UACC, \
176180
PPC::RESTORE_WACC, \
181+
PPC::RESTORE_DMR, \
177182
NoInstr, \
178183
PPC::RESTORE_QUADWORD}
179184

@@ -193,6 +198,7 @@ enum PPCMachineCombinerPattern : unsigned {
193198
NoInstr, \
194199
NoInstr, \
195200
NoInstr, \
201+
NoInstr, \
196202
PPC::EVSTDD, \
197203
PPC::SPILL_QUADWORD}
198204

@@ -213,6 +219,7 @@ enum PPCMachineCombinerPattern : unsigned {
213219
NoInstr, \
214220
NoInstr, \
215221
NoInstr, \
222+
NoInstr, \
216223
PPC::SPILL_QUADWORD}
217224

218225
#define Pwr10StoreOpcodes \
@@ -232,6 +239,7 @@ enum PPCMachineCombinerPattern : unsigned {
232239
PPC::SPILL_UACC, \
233240
NoInstr, \
234241
NoInstr, \
242+
NoInstr, \
235243
PPC::SPILL_QUADWORD}
236244

237245
#define FutureStoreOpcodes \
@@ -250,6 +258,7 @@ enum PPCMachineCombinerPattern : unsigned {
250258
PPC::SPILL_ACC, \
251259
PPC::SPILL_UACC, \
252260
PPC::SPILL_WACC, \
261+
PPC::SPILL_DMR, \
253262
NoInstr, \
254263
PPC::SPILL_QUADWORD}
255264

llvm/lib/Target/PowerPC/PPCInstrMMA.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,10 +565,14 @@ let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in {
565565
let mayStore = 1 in {
566566
def SPILL_WACC: PPCEmitTimePseudo<(outs), (ins wacc:$AT, memrix16:$dst),
567567
"#SPILL_WACC", []>;
568+
def SPILL_DMR: PPCEmitTimePseudo<(outs), (ins dmr:$AT, memrix16:$dst),
569+
"#SPILL_DMR", []>;
568570
}
569571
let mayLoad = 1, hasSideEffects = 0 in {
570572
def RESTORE_WACC: PPCEmitTimePseudo<(outs wacc:$AT), (ins memrix16:$src),
571573
"#RESTORE_WACC", []>;
574+
def RESTORE_DMR: PPCEmitTimePseudo<(outs dmr:$AT), (ins memrix16:$src),
575+
"#RESTORE_DMR", []>;
572576
}
573577
}
574578

llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1509,6 +1509,95 @@ void PPCRegisterInfo::lowerQuadwordRestore(MachineBasicBlock::iterator II,
15091509
MBB.erase(II);
15101510
}
15111511

1512+
/// lowerDMRSpilling - Generate the code for spilling the DMR register.
1513+
void PPCRegisterInfo::lowerDMRSpilling(MachineBasicBlock::iterator II,
1514+
unsigned FrameIndex) const {
1515+
MachineInstr &MI = *II; // SPILL_DMR <SrcReg>, <offset>
1516+
MachineBasicBlock &MBB = *MI.getParent();
1517+
MachineFunction &MF = *MBB.getParent();
1518+
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
1519+
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
1520+
DebugLoc DL = MI.getDebugLoc();
1521+
bool IsLittleEndian = Subtarget.isLittleEndian();
1522+
1523+
// DMR is made up of WACC and WACC_HI, so DMXXEXTFDMR512 to spill
1524+
// the corresponding 512 bits.
1525+
const TargetRegisterClass *RC = &PPC::VSRpRCRegClass;
1526+
Register SrcReg = MI.getOperand(0).getReg();
1527+
1528+
Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
1529+
Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC);
1530+
Register VSRpReg2 = MF.getRegInfo().createVirtualRegister(RC);
1531+
Register VSRpReg3 = MF.getRegInfo().createVirtualRegister(RC);
1532+
1533+
BuildMI(MBB, II, DL, TII.get(PPC::DMXXEXTFDMR512_HI), VSRpReg2)
1534+
.addDef(VSRpReg3)
1535+
.addReg(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_wacc_hi));
1536+
1537+
BuildMI(MBB, II, DL, TII.get(PPC::DMXXEXTFDMR512), VSRpReg0)
1538+
.addDef(VSRpReg1)
1539+
.addReg(TargetRegisterInfo::getSubReg(SrcReg, PPC::sub_wacc_lo));
1540+
1541+
addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
1542+
.addReg(VSRpReg0, RegState::Kill),
1543+
FrameIndex, IsLittleEndian ? 96 : 0);
1544+
addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
1545+
.addReg(VSRpReg1, RegState::Kill),
1546+
FrameIndex, IsLittleEndian ? 64 : 32);
1547+
addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
1548+
.addReg(VSRpReg2, RegState::Kill),
1549+
FrameIndex, IsLittleEndian ? 32 : 64);
1550+
addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
1551+
.addReg(VSRpReg3, RegState::Kill),
1552+
FrameIndex, IsLittleEndian ? 0 : 96);
1553+
1554+
// Discard the pseudo instruction.
1555+
MBB.erase(II);
1556+
}
1557+
1558+
/// lowerDMRRestore - Generate the code to restore the DMR register.
1559+
void PPCRegisterInfo::lowerDMRRestore(MachineBasicBlock::iterator II,
1560+
unsigned FrameIndex) const {
1561+
MachineInstr &MI = *II; // <DestReg> = RESTORE_WACC <offset>
1562+
MachineBasicBlock &MBB = *MI.getParent();
1563+
MachineFunction &MF = *MBB.getParent();
1564+
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
1565+
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
1566+
DebugLoc DL = MI.getDebugLoc();
1567+
bool IsLittleEndian = Subtarget.isLittleEndian();
1568+
1569+
const TargetRegisterClass *RC = &PPC::VSRpRCRegClass;
1570+
Register DestReg = MI.getOperand(0).getReg();
1571+
1572+
Register VSRpReg0 = MF.getRegInfo().createVirtualRegister(RC);
1573+
Register VSRpReg1 = MF.getRegInfo().createVirtualRegister(RC);
1574+
Register VSRpReg2 = MF.getRegInfo().createVirtualRegister(RC);
1575+
Register VSRpReg3 = MF.getRegInfo().createVirtualRegister(RC);
1576+
1577+
addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg0),
1578+
FrameIndex, IsLittleEndian ? 96 : 0);
1579+
addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg1),
1580+
FrameIndex, IsLittleEndian ? 64 : 32);
1581+
addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg2),
1582+
FrameIndex, IsLittleEndian ? 32 : 64);
1583+
addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LXVP), VSRpReg3),
1584+
FrameIndex, IsLittleEndian ? 0 : 96);
1585+
1586+
// Kill virtual registers (killedRegState::Killed).
1587+
BuildMI(MBB, II, DL, TII.get(PPC::DMXXINSTDMR512_HI),
1588+
TargetRegisterInfo::getSubReg(DestReg, PPC::sub_wacc_hi))
1589+
.addReg(VSRpReg2, RegState::Kill)
1590+
.addReg(VSRpReg3, RegState::Kill);
1591+
1592+
BuildMI(MBB, II, DL, TII.get(PPC::DMXXINSTDMR512),
1593+
TargetRegisterInfo::getSubReg(DestReg, PPC::sub_wacc_lo))
1594+
.addReg(VSRpReg0, RegState::Kill)
1595+
.addReg(VSRpReg1, RegState::Kill);
1596+
1597+
// Discard the pseudo instruction.
1598+
MBB.erase(II);
1599+
}
1600+
15121601
bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
15131602
Register Reg, int &FrameIdx) const {
15141603
// For the nonvolatile condition registers (CR2, CR3, CR4) return true to
@@ -1671,6 +1760,12 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
16711760
case PPC::RESTORE_WACC:
16721761
lowerWACCRestore(II, FrameIndex);
16731762
return true;
1763+
case PPC::SPILL_DMR:
1764+
lowerDMRSpilling(II, FrameIndex);
1765+
return true;
1766+
case PPC::RESTORE_DMR:
1767+
lowerDMRRestore(II, FrameIndex);
1768+
return true;
16741769
case PPC::SPILL_QUADWORD:
16751770
lowerQuadwordSpilling(II, FrameIndex);
16761771
return true;

llvm/lib/Target/PowerPC/PPCRegisterInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,11 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
151151
void lowerQuadwordRestore(MachineBasicBlock::iterator II,
152152
unsigned FrameIndex) const;
153153

154+
void lowerDMRSpilling(MachineBasicBlock::iterator II,
155+
unsigned FrameIndex) const;
156+
void lowerDMRRestore(MachineBasicBlock::iterator II,
157+
unsigned FrameIndex) const;
158+
154159
static void emitAccCopyInfo(MachineBasicBlock &MBB, MCRegister DestReg,
155160
MCRegister SrcReg);
156161

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3+
; RUN: -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \
4+
; RUN: -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s
5+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
6+
; RUN: -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \
7+
; RUN: -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s --check-prefix=AIX
8+
; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
9+
; RUN: -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \
10+
; RUN: -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s --check-prefix=AIX32
11+
12+
declare <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2pp(<1024 x i1>, <256 x i1>, <16 x i8>)
13+
declare void @dummy_func()
14+
15+
define void @spillDMRreg(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) nounwind {
16+
; CHECK-LABEL: spillDMRreg:
17+
; CHECK: # %bb.0:
18+
; CHECK-NEXT: mflr r0
19+
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
20+
; CHECK-NEXT: std r0, 16(r1)
21+
; CHECK-NEXT: stdu r1, -176(r1)
22+
; CHECK-NEXT: lxvp vsp34, 0(r3)
23+
; CHECK-NEXT: lxvp vsp36, 32(r3)
24+
; CHECK-NEXT: mr r30, r6
25+
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
26+
; CHECK-NEXT: lxvp vsp34, 64(r3)
27+
; CHECK-NEXT: lxvp vsp36, 96(r3)
28+
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
29+
; CHECK-NEXT: lxv v2, 16(r4)
30+
; CHECK-NEXT: lxv v3, 0(r4)
31+
; CHECK-NEXT: lxv vs0, 0(r5)
32+
; CHECK-NEXT: dmxvbf16gerx2pp dmr0, vsp34, vs0
33+
; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1
34+
; CHECK-NEXT: dmxxextfdmr512 vsp38, vsp32, wacc0, 0
35+
; CHECK-NEXT: stxvp vsp38, 128(r1)
36+
; CHECK-NEXT: stxvp vsp32, 96(r1)
37+
; CHECK-NEXT: stxvp vsp36, 64(r1)
38+
; CHECK-NEXT: stxvp vsp34, 32(r1)
39+
; CHECK-NEXT: bl dummy_func@notoc
40+
; CHECK-NEXT: lxvp vsp34, 128(r1)
41+
; CHECK-NEXT: lxvp vsp36, 96(r1)
42+
; CHECK-NEXT: lxvp vsp32, 64(r1)
43+
; CHECK-NEXT: lxvp vsp38, 32(r1)
44+
; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp32, vsp38, 1
45+
; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
46+
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
47+
; CHECK-NEXT: stxvp vsp34, 96(r30)
48+
; CHECK-NEXT: stxvp vsp36, 64(r30)
49+
; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
50+
; CHECK-NEXT: stxvp vsp34, 32(r30)
51+
; CHECK-NEXT: stxvp vsp36, 0(r30)
52+
; CHECK-NEXT: addi r1, r1, 176
53+
; CHECK-NEXT: ld r0, 16(r1)
54+
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
55+
; CHECK-NEXT: mtlr r0
56+
; CHECK-NEXT: blr
57+
;
58+
; AIX-LABEL: spillDMRreg:
59+
; AIX: # %bb.0:
60+
; AIX-NEXT: mflr r0
61+
; AIX-NEXT: std r0, 16(r1)
62+
; AIX-NEXT: stdu r1, -256(r1)
63+
; AIX-NEXT: std r31, 248(r1) # 8-byte Folded Spill
64+
; AIX-NEXT: lxvp vsp34, 96(r3)
65+
; AIX-NEXT: lxvp vsp36, 64(r3)
66+
; AIX-NEXT: mr r31, r6
67+
; AIX-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
68+
; AIX-NEXT: lxvp vsp34, 32(r3)
69+
; AIX-NEXT: lxvp vsp36, 0(r3)
70+
; AIX-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
71+
; AIX-NEXT: lxv v2, 0(r4)
72+
; AIX-NEXT: lxv v3, 16(r4)
73+
; AIX-NEXT: lxv vs0, 0(r5)
74+
; AIX-NEXT: dmxvbf16gerx2pp dmr0, vsp34, vs0
75+
; AIX-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1
76+
; AIX-NEXT: dmxxextfdmr512 vsp38, vsp32, wacc0, 0
77+
; AIX-NEXT: stxvp vsp38, 112(r1)
78+
; AIX-NEXT: stxvp vsp32, 144(r1)
79+
; AIX-NEXT: stxvp vsp36, 176(r1)
80+
; AIX-NEXT: stxvp vsp34, 208(r1)
81+
; AIX-NEXT: bl .dummy_func[PR]
82+
; AIX-NEXT: nop
83+
; AIX-NEXT: lxvp vsp34, 112(r1)
84+
; AIX-NEXT: lxvp vsp36, 144(r1)
85+
; AIX-NEXT: lxvp vsp32, 176(r1)
86+
; AIX-NEXT: lxvp vsp38, 208(r1)
87+
; AIX-NEXT: dmxxinstdmr512 wacc_hi0, vsp32, vsp38, 1
88+
; AIX-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
89+
; AIX-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
90+
; AIX-NEXT: stxvp vsp36, 96(r31)
91+
; AIX-NEXT: stxvp vsp34, 64(r31)
92+
; AIX-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
93+
; AIX-NEXT: stxvp vsp36, 32(r31)
94+
; AIX-NEXT: stxvp vsp34, 0(r31)
95+
; AIX-NEXT: ld r31, 248(r1) # 8-byte Folded Reload
96+
; AIX-NEXT: addi r1, r1, 256
97+
; AIX-NEXT: ld r0, 16(r1)
98+
; AIX-NEXT: mtlr r0
99+
; AIX-NEXT: blr
100+
;
101+
; AIX32-LABEL: spillDMRreg:
102+
; AIX32: # %bb.0:
103+
; AIX32-NEXT: mflr r0
104+
; AIX32-NEXT: stw r0, 8(r1)
105+
; AIX32-NEXT: stwu r1, -208(r1)
106+
; AIX32-NEXT: stw r31, 204(r1) # 4-byte Folded Spill
107+
; AIX32-NEXT: lxvp vsp34, 96(r3)
108+
; AIX32-NEXT: lxvp vsp36, 64(r3)
109+
; AIX32-NEXT: mr r31, r6
110+
; AIX32-NEXT: dmxxinstdmr512 wacc_hi0, vsp36, vsp34, 1
111+
; AIX32-NEXT: lxvp vsp34, 32(r3)
112+
; AIX32-NEXT: lxvp vsp36, 0(r3)
113+
; AIX32-NEXT: dmxxinstdmr512 wacc0, vsp36, vsp34, 0
114+
; AIX32-NEXT: lxv v2, 0(r4)
115+
; AIX32-NEXT: lxv v3, 16(r4)
116+
; AIX32-NEXT: lxv vs0, 0(r5)
117+
; AIX32-NEXT: dmxvbf16gerx2pp dmr0, vsp34, vs0
118+
; AIX32-NEXT: dmxxextfdmr512 vsp36, vsp34, wacc_hi0, 1
119+
; AIX32-NEXT: dmxxextfdmr512 vsp38, vsp32, wacc0, 0
120+
; AIX32-NEXT: stxvp vsp38, 64(r1)
121+
; AIX32-NEXT: stxvp vsp32, 96(r1)
122+
; AIX32-NEXT: stxvp vsp36, 128(r1)
123+
; AIX32-NEXT: stxvp vsp34, 160(r1)
124+
; AIX32-NEXT: bl .dummy_func[PR]
125+
; AIX32-NEXT: nop
126+
; AIX32-NEXT: lxvp vsp34, 64(r1)
127+
; AIX32-NEXT: lxvp vsp36, 96(r1)
128+
; AIX32-NEXT: lxvp vsp32, 128(r1)
129+
; AIX32-NEXT: lxvp vsp38, 160(r1)
130+
; AIX32-NEXT: dmxxinstdmr512 wacc_hi0, vsp32, vsp38, 1
131+
; AIX32-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
132+
; AIX32-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
133+
; AIX32-NEXT: stxvp vsp36, 96(r31)
134+
; AIX32-NEXT: stxvp vsp34, 64(r31)
135+
; AIX32-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
136+
; AIX32-NEXT: stxvp vsp36, 32(r31)
137+
; AIX32-NEXT: stxvp vsp34, 0(r31)
138+
; AIX32-NEXT: lwz r31, 204(r1) # 4-byte Folded Reload
139+
; AIX32-NEXT: addi r1, r1, 208
140+
; AIX32-NEXT: lwz r0, 8(r1)
141+
; AIX32-NEXT: mtlr r0
142+
; AIX32-NEXT: blr
143+
%v.dmr = load <1024 x i1>, ptr %vop, align 64
144+
%v1 = load <256 x i1>, ptr %vpp, align 32
145+
%v2 = load <16 x i8>, ptr %vcp, align 32
146+
%call = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2pp(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2)
147+
tail call void @dummy_func()
148+
%call2 = tail call <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2pp(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2)
149+
store <1024 x i1> %call, ptr %resp, align 64
150+
ret void
151+
}

0 commit comments

Comments
 (0)