Skip to content

Commit cc45e60

Browse files
committed
RegMem patch.
1 parent 3876634 commit cc45e60

21 files changed

+886
-41
lines changed

llvm/include/llvm/CodeGen/TargetInstrInfo.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1700,7 +1700,7 @@ class TargetInstrInfo : public MCInstrInfo {
17001700
/// instruction that defines FoldAsLoadDefReg, and the function returns
17011701
/// the machine instruction generated due to folding.
17021702
virtual MachineInstr *optimizeLoadInstr(MachineInstr &MI,
1703-
const MachineRegisterInfo *MRI,
1703+
MachineRegisterInfo *MRI,
17041704
Register &FoldAsLoadDefReg,
17051705
MachineInstr *&DefMI) const {
17061706
return nullptr;

llvm/lib/CodeGen/PeepholeOptimizer.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1868,6 +1868,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
18681868
// If we run into an instruction we can't fold across, discard
18691869
// the load candidates. Note: We might be able to fold *into* this
18701870
// instruction, so this needs to be after the folding logic.
1871+
// TODO: Try AA for a store?
18711872
if (MI->isLoadFoldBarrier()) {
18721873
LLVM_DEBUG(dbgs() << "Encountered load fold barrier on " << *MI);
18731874
FoldAsLoadDefCandidates.clear();

llvm/lib/Target/SystemZ/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ add_llvm_target(SystemZCodeGen
2020
SystemZConstantPoolValue.cpp
2121
SystemZCopyPhysRegs.cpp
2222
SystemZElimCompare.cpp
23+
SystemZFinalizeRegMem.cpp
2324
SystemZFrameLowering.cpp
2425
SystemZHazardRecognizer.cpp
2526
SystemZISelDAGToDAG.cpp

llvm/lib/Target/SystemZ/SystemZ.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,12 +195,14 @@ FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
195195
FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
196196
FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
197197
FunctionPass *createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM);
198+
FunctionPass *createSystemZFinalizeRegMemPass(SystemZTargetMachine &TM);
198199
FunctionPass *createSystemZPostRewritePass(SystemZTargetMachine &TM);
199200
FunctionPass *createSystemZTDCPass();
200201

201202
void initializeSystemZCopyPhysRegsPass(PassRegistry &);
202203
void initializeSystemZDAGToDAGISelPass(PassRegistry &);
203204
void initializeSystemZElimComparePass(PassRegistry &);
205+
void initializeSystemZFinalizeRegMemPass(PassRegistry &);
204206
void initializeSystemZLDCleanupPass(PassRegistry &);
205207
void initializeSystemZLongBranchPass(PassRegistry &);
206208
void initializeSystemZPostRewritePass(PassRegistry &);
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
//===------- SystemZFinalizeRegMem.cpp - Finalize FP reg/mem folding ------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This pass converts any remaining reg/reg pseudos into the real target
10+
// instruction in cases where the peephole optimizer did not fold a load into
11+
// a reg/mem instruction.
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
#include "SystemZMachineFunctionInfo.h"
16+
#include "SystemZTargetMachine.h"
17+
#include "llvm/CodeGen/MachineDominators.h"
18+
#include "llvm/CodeGen/MachineFunctionPass.h"
19+
#include "llvm/CodeGen/MachineInstrBuilder.h"
20+
#include "llvm/CodeGen/MachineRegisterInfo.h"
21+
#include "llvm/CodeGen/TargetInstrInfo.h"
22+
#include "llvm/CodeGen/TargetRegisterInfo.h"
23+
#include "llvm/Target/TargetMachine.h"
24+
25+
using namespace llvm;
26+
27+
namespace {
28+
29+
class SystemZFinalizeRegMem : public MachineFunctionPass {
30+
public:
31+
static char ID;
32+
SystemZFinalizeRegMem()
33+
: MachineFunctionPass(ID), TII(nullptr), MRI(nullptr) {
34+
initializeSystemZFinalizeRegMemPass(*PassRegistry::getPassRegistry());
35+
}
36+
37+
bool runOnMachineFunction(MachineFunction &MF) override;
38+
void getAnalysisUsage(AnalysisUsage &AU) const override;
39+
40+
private:
41+
42+
bool visitMBB(MachineBasicBlock &MBB);
43+
44+
const SystemZInstrInfo *TII;
45+
MachineRegisterInfo *MRI;
46+
};
47+
48+
char SystemZFinalizeRegMem::ID = 0;
49+
50+
} // end anonymous namespace
51+
52+
INITIALIZE_PASS(SystemZFinalizeRegMem, "systemz-finalize-regmem",
53+
"SystemZ Finalize RegMem", false, false)
54+
55+
FunctionPass *llvm::
56+
createSystemZFinalizeRegMemPass(SystemZTargetMachine &TM) {
57+
return new SystemZFinalizeRegMem();
58+
}
59+
60+
void SystemZFinalizeRegMem::getAnalysisUsage(AnalysisUsage &AU) const {
61+
AU.setPreservesCFG();
62+
MachineFunctionPass::getAnalysisUsage(AU);
63+
}
64+
65+
bool SystemZFinalizeRegMem::visitMBB(MachineBasicBlock &MBB) {
66+
bool Changed = false;
67+
for (MachineInstr &MI : MBB) {
68+
unsigned PseudoOpcode = MI.getOpcode();
69+
unsigned TargetOpcode =
70+
PseudoOpcode == SystemZ::WFADB_CCPseudo ? SystemZ::WFADB
71+
: PseudoOpcode == SystemZ::WFASB_CCPseudo ? SystemZ::WFASB
72+
: PseudoOpcode == SystemZ::WFSDB_CCPseudo ? SystemZ::WFSDB
73+
: PseudoOpcode == SystemZ::WFSSB_CCPseudo ? SystemZ::WFSSB
74+
: 0;
75+
if (TargetOpcode) {
76+
MI.setDesc(TII->get(TargetOpcode));
77+
int CCIdx = MI.findRegisterDefOperandIdx(SystemZ::CC);
78+
MI.removeOperand(CCIdx);
79+
Changed = true;
80+
}
81+
}
82+
return Changed;
83+
}
84+
85+
bool SystemZFinalizeRegMem::runOnMachineFunction(MachineFunction &F) {
86+
TII = F.getSubtarget<SystemZSubtarget>().getInstrInfo();
87+
MRI = &F.getRegInfo();
88+
89+
bool Modified = false;
90+
for (auto &MBB : F)
91+
Modified |= visitMBB(MBB);
92+
93+
return Modified;
94+
}

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,10 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
692692
setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
693693
}
694694

695+
// Don't select reg/mem LDEB if WLDEB is available.
696+
if (Subtarget.hasVector())
697+
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
698+
695699
// Floating-point truncation and stores need to be done separately.
696700
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
697701
setTruncStoreAction(MVT::f128, MVT::f32, Expand);

llvm/lib/Target/SystemZ/SystemZInstrFP.td

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ let Predicates = [FeatureNoVectorEnhancements1] in {
201201
// Extend memory floating-point values to wider representations.
202202
let Uses = [FPC], mayRaiseFPException = 1 in {
203203
def LDEB : UnaryRXE<"ldeb", 0xED04, z_any_extloadf32, FP64, 4>;
204+
def LDEB : UnaryRXE<"ldeb", 0xED04, z_fpr_any_extloadf32, FP64, 4>;
204205
def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>;
205206
def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>;
206207
}
@@ -362,8 +363,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
362363
def SQDBR : UnaryRRE<"sqdbr", 0xB315, any_fsqrt, FP64, FP64>;
363364
def SQXBR : UnaryRRE<"sqxbr", 0xB316, any_fsqrt, FP128, FP128>;
364365

365-
def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<any_fsqrt>, FP32, 4>;
366-
def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<any_fsqrt>, FP64, 8>;
366+
def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<any_fsqrt, z_fprload>, FP32, 4>;
367+
def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<any_fsqrt, z_fprload>, FP64, 8>;
367368
}
368369

369370
// Round to an integer, with the second operand (modifier M3) specifying
@@ -432,6 +433,8 @@ let Uses = [FPC], mayRaiseFPException = 1,
432433
}
433434
defm AEB : BinaryRXEAndPseudo<"aeb", 0xED0A, any_fadd, FP32, z_load, 4>;
434435
defm ADB : BinaryRXEAndPseudo<"adb", 0xED1A, any_fadd, FP64, z_load, 8>;
436+
defm AEB : BinaryRXEAndPseudo<"aeb", 0xED0A, any_fadd, FP32, z_fprload, 4>;
437+
defm ADB : BinaryRXEAndPseudo<"adb", 0xED1A, any_fadd, FP64, z_fprload, 8>;
435438
}
436439

437440
// Subtraction.
@@ -443,6 +446,8 @@ let Uses = [FPC], mayRaiseFPException = 1,
443446

444447
defm SEB : BinaryRXEAndPseudo<"seb", 0xED0B, any_fsub, FP32, z_load, 4>;
445448
defm SDB : BinaryRXEAndPseudo<"sdb", 0xED1B, any_fsub, FP64, z_load, 8>;
449+
defm SEB : BinaryRXEAndPseudo<"seb", 0xED0B, any_fsub, FP32, z_fprload, 4>;
450+
defm SDB : BinaryRXEAndPseudo<"sdb", 0xED1B, any_fsub, FP64, z_fprload, 8>;
446451
}
447452

448453
// Multiplication.
@@ -454,6 +459,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
454459
}
455460
defm MEEB : BinaryRXEAndPseudo<"meeb", 0xED17, any_fmul, FP32, z_load, 4>;
456461
defm MDB : BinaryRXEAndPseudo<"mdb", 0xED1C, any_fmul, FP64, z_load, 8>;
462+
defm MEEB : BinaryRXEAndPseudo<"meeb", 0xED17, any_fmul, FP32, z_fprload, 4>;
463+
defm MDB : BinaryRXEAndPseudo<"mdb", 0xED1C, any_fmul, FP64, z_fprload, 8>;
457464
}
458465

459466
// f64 multiplication of two FP32 registers.
@@ -497,6 +504,10 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
497504

498505
defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32, z_load, 4>;
499506
defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64, z_load, 8>;
507+
defm MAEB : TernaryRXFAndPseudo<"maeb", 0xED0E, z_any_fma, FP32, FP32,
508+
z_fprload, 4>;
509+
defm MADB : TernaryRXFAndPseudo<"madb", 0xED1E, z_any_fma, FP64, FP64,
510+
z_fprload, 8>;
500511
}
501512

502513
// Fused multiply-subtract.
@@ -506,6 +517,10 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
506517

507518
defm MSEB : TernaryRXFAndPseudo<"mseb", 0xED0F, z_any_fms, FP32, FP32, z_load, 4>;
508519
defm MSDB : TernaryRXFAndPseudo<"msdb", 0xED1F, z_any_fms, FP64, FP64, z_load, 8>;
520+
defm MSEB : TernaryRXFAndPseudo<"mseb", 0xED0F, z_any_fms, FP32, FP32,
521+
z_fprload, 4>;
522+
defm MSDB : TernaryRXFAndPseudo<"msdb", 0xED1F, z_any_fms, FP64, FP64,
523+
z_fprload, 8>;
509524
}
510525

511526
// Division.
@@ -516,6 +531,8 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
516531

517532
defm DEB : BinaryRXEAndPseudo<"deb", 0xED0D, any_fdiv, FP32, z_load, 4>;
518533
defm DDB : BinaryRXEAndPseudo<"ddb", 0xED1D, any_fdiv, FP64, z_load, 8>;
534+
defm DEB : BinaryRXEAndPseudo<"deb", 0xED0D, any_fdiv, FP32, z_fprload, 4>;
535+
defm DDB : BinaryRXEAndPseudo<"ddb", 0xED1D, any_fdiv, FP64, z_fprload, 8>;
519536
}
520537

521538
// Divide to integer.
@@ -535,6 +552,8 @@ let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC], CCValues = 0xF in {
535552

536553
def CEB : CompareRXE<"ceb", 0xED09, z_any_fcmp, FP32, z_load, 4>;
537554
def CDB : CompareRXE<"cdb", 0xED19, z_any_fcmp, FP64, z_load, 8>;
555+
def CEB : CompareRXE<"ceb", 0xED09, z_any_fcmp, FP32, z_fprload, 4>;
556+
def CDB : CompareRXE<"cdb", 0xED19, z_any_fcmp, FP64, z_fprload, 8>;
538557

539558
def KEBR : CompareRRE<"kebr", 0xB308, z_strict_fcmps, FP32, FP32>;
540559
def KDBR : CompareRRE<"kdbr", 0xB318, z_strict_fcmps, FP64, FP64>;

llvm/lib/Target/SystemZ/SystemZInstrFormats.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5536,3 +5536,17 @@ multiclass StringRRE<string mnemonic, bits<16> opcode,
55365536
[(set GR64:$end, (operator GR64:$start1, GR64:$start2,
55375537
GR32:$char))]>;
55385538
}
5539+
5540+
multiclass BinaryVRRcAndCCPseudo<string mnemonic, bits<16> opcode,
5541+
SDPatternOperator operator,
5542+
TypedReg tr1, TypedReg tr2, bits<4> type = 0,
5543+
bits<4> m5 = 0, bits<4> m6 = 0,
5544+
string fp_mnemonic = ""> {
5545+
def "" : BinaryVRRc<mnemonic, opcode, null_frag, tr1, tr2, type, m5, m6,
5546+
fp_mnemonic>;
5547+
let Defs = [CC] in
5548+
def _CCPseudo : Pseudo<(outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3),
5549+
[(set (tr1.vt tr1.op:$V1),
5550+
(operator (tr2.vt tr2.op:$V2),
5551+
(tr2.vt tr2.op:$V3)))]>;
5552+
}

0 commit comments

Comments
 (0)