Skip to content

Commit 4b7f415

Browse files
wangleiattru
authored andcommitted
[LoongArch] Implement COPY instruction between CFRs (#69300)
With this patch, all CFRs can be used for register allocation. (cherry picked from commit 271087e)
1 parent fb62a20 commit 4b7f415

12 files changed

+227
-14
lines changed

llvm/lib/Target/LoongArch/LoongArch.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,11 @@ bool lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO,
3636
FunctionPass *createLoongArchExpandAtomicPseudoPass();
3737
FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM);
3838
FunctionPass *createLoongArchPreRAExpandPseudoPass();
39+
FunctionPass *createLoongArchExpandPseudoPass();
3940
void initializeLoongArchDAGToDAGISelPass(PassRegistry &);
4041
void initializeLoongArchExpandAtomicPseudoPass(PassRegistry &);
4142
void initializeLoongArchPreRAExpandPseudoPass(PassRegistry &);
43+
void initializeLoongArchExpandPseudoPass(PassRegistry &);
4244
} // end namespace llvm
4345

4446
#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H

llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ using namespace llvm;
2929

3030
#define LOONGARCH_PRERA_EXPAND_PSEUDO_NAME \
3131
"LoongArch Pre-RA pseudo instruction expansion pass"
32+
#define LOONGARCH_EXPAND_PSEUDO_NAME \
33+
"LoongArch pseudo instruction expansion pass"
3234

3335
namespace {
3436

@@ -513,15 +515,134 @@ bool LoongArchPreRAExpandPseudo::expandFunctionCALL(
513515
return true;
514516
}
515517

518+
class LoongArchExpandPseudo : public MachineFunctionPass {
519+
public:
520+
const LoongArchInstrInfo *TII;
521+
static char ID;
522+
523+
LoongArchExpandPseudo() : MachineFunctionPass(ID) {
524+
initializeLoongArchExpandPseudoPass(*PassRegistry::getPassRegistry());
525+
}
526+
527+
bool runOnMachineFunction(MachineFunction &MF) override;
528+
529+
StringRef getPassName() const override {
530+
return LOONGARCH_EXPAND_PSEUDO_NAME;
531+
}
532+
533+
private:
534+
bool expandMBB(MachineBasicBlock &MBB);
535+
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
536+
MachineBasicBlock::iterator &NextMBBI);
537+
bool expandCopyCFR(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
538+
MachineBasicBlock::iterator &NextMBBI);
539+
};
540+
541+
char LoongArchExpandPseudo::ID = 0;
542+
543+
bool LoongArchExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
544+
TII =
545+
static_cast<const LoongArchInstrInfo *>(MF.getSubtarget().getInstrInfo());
546+
547+
bool Modified = false;
548+
for (auto &MBB : MF)
549+
Modified |= expandMBB(MBB);
550+
551+
return Modified;
552+
}
553+
554+
bool LoongArchExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
555+
bool Modified = false;
556+
557+
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
558+
while (MBBI != E) {
559+
MachineBasicBlock::iterator NMBBI = std::next(MBBI);
560+
Modified |= expandMI(MBB, MBBI, NMBBI);
561+
MBBI = NMBBI;
562+
}
563+
564+
return Modified;
565+
}
566+
567+
bool LoongArchExpandPseudo::expandMI(MachineBasicBlock &MBB,
568+
MachineBasicBlock::iterator MBBI,
569+
MachineBasicBlock::iterator &NextMBBI) {
570+
switch (MBBI->getOpcode()) {
571+
case LoongArch::PseudoCopyCFR:
572+
return expandCopyCFR(MBB, MBBI, NextMBBI);
573+
}
574+
575+
return false;
576+
}
577+
578+
bool LoongArchExpandPseudo::expandCopyCFR(
579+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
580+
MachineBasicBlock::iterator &NextMBBI) {
581+
MachineFunction *MF = MBB.getParent();
582+
MachineInstr &MI = *MBBI;
583+
DebugLoc DL = MI.getDebugLoc();
584+
585+
// Expand:
586+
// MBB:
587+
// fcmp.caf.s $dst, $fa0, $fa0 # set $dst 0(false)
588+
// bceqz $src, SinkBB
589+
// FalseBB:
590+
// fcmp.cueq.s $dst, $fa0, $fa0 # set $dst 1(true)
591+
// SinkBB:
592+
// fallthrough
593+
594+
const BasicBlock *LLVM_BB = MBB.getBasicBlock();
595+
auto *FalseBB = MF->CreateMachineBasicBlock(LLVM_BB);
596+
auto *SinkBB = MF->CreateMachineBasicBlock(LLVM_BB);
597+
598+
MF->insert(++MBB.getIterator(), FalseBB);
599+
MF->insert(++FalseBB->getIterator(), SinkBB);
600+
601+
Register DestReg = MI.getOperand(0).getReg();
602+
Register SrcReg = MI.getOperand(1).getReg();
603+
// DestReg = 0
604+
BuildMI(MBB, MBBI, DL, TII->get(LoongArch::SET_CFR_FALSE), DestReg);
605+
// Insert branch instruction.
606+
BuildMI(MBB, MBBI, DL, TII->get(LoongArch::BCEQZ))
607+
.addReg(SrcReg)
608+
.addMBB(SinkBB);
609+
// DestReg = 1
610+
BuildMI(FalseBB, DL, TII->get(LoongArch::SET_CFR_TRUE), DestReg);
611+
612+
FalseBB->addSuccessor(SinkBB);
613+
614+
SinkBB->splice(SinkBB->end(), &MBB, MI, MBB.end());
615+
SinkBB->transferSuccessors(&MBB);
616+
617+
MBB.addSuccessor(FalseBB);
618+
MBB.addSuccessor(SinkBB);
619+
620+
NextMBBI = MBB.end();
621+
MI.eraseFromParent();
622+
623+
// Make sure live-ins are correctly attached to this new basic block.
624+
LivePhysRegs LiveRegs;
625+
computeAndAddLiveIns(LiveRegs, *FalseBB);
626+
computeAndAddLiveIns(LiveRegs, *SinkBB);
627+
628+
return true;
629+
}
630+
516631
} // end namespace
517632

518633
INITIALIZE_PASS(LoongArchPreRAExpandPseudo, "loongarch-prera-expand-pseudo",
519634
LOONGARCH_PRERA_EXPAND_PSEUDO_NAME, false, false)
520635

636+
INITIALIZE_PASS(LoongArchExpandPseudo, "loongarch-expand-pseudo",
637+
LOONGARCH_EXPAND_PSEUDO_NAME, false, false)
638+
521639
namespace llvm {
522640

523641
FunctionPass *createLoongArchPreRAExpandPseudoPass() {
524642
return new LoongArchPreRAExpandPseudo();
525643
}
644+
FunctionPass *createLoongArchExpandPseudoPass() {
645+
return new LoongArchExpandPseudo();
646+
}
526647

527648
} // end namespace llvm

llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,23 @@ def PseudoST_CFR : Pseudo<(outs),
126126
let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
127127
def PseudoLD_CFR : Pseudo<(outs CFR:$ccd),
128128
(ins GPR:$rj, grlenimm:$imm)>;
129+
130+
// SET_CFR_{FALSE,TRUE}
131+
// These instructions are defined in order to avoid expensive check error if
132+
// regular instruction patterns are used.
133+
// fcmp.caf.s $dst, $fa0, $fa0
134+
def SET_CFR_FALSE : SET_CFR<0x0c100000, "fcmp.caf.s">;
135+
// fcmp.cueq.s $dst, $fa0, $fa0
136+
def SET_CFR_TRUE : SET_CFR<0x0c160000, "fcmp.cueq.s">;
137+
138+
// Pseudo instruction for copying CFRs.
139+
def PseudoCopyCFR : Pseudo<(outs CFR:$dst), (ins CFR:$src)> {
140+
let mayLoad = 0;
141+
let mayStore = 0;
142+
let hasSideEffects = 0;
143+
let Size = 12;
144+
}
145+
129146
} // Predicates = [HasBasicF]
130147

131148
//===----------------------------------------------------------------------===//

llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,3 +218,15 @@ class FP_STORE_2RI12<bits<32> op, RegisterClass rc = FPR32>
218218
: FPFmt2RI12<op, (outs), (ins rc:$fd, GPR:$rj, simm12:$imm12),
219219
"$fd, $rj, $imm12">;
220220
} // hasSideEffects = 0, mayLoad = 0, mayStore = 1
221+
222+
// This class is used to define `SET_CFR_{FALSE,TRUE}` instructions which are
223+
// used to expand `PseudoCopyCFR`.
224+
class SET_CFR<bits<32> op, string opcstr>
225+
: FP_CMP<op> {
226+
let isCodeGenOnly = 1;
227+
let fj = 0; // fa0
228+
let fk = 0; // fa0
229+
let AsmString = opcstr # "\t$cd, $$fa0, $$fa0";
230+
let OutOperandList = (outs CFR:$cd);
231+
let InOperandList = (ins);
232+
}

llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
6161
.addReg(SrcReg, getKillRegState(KillSrc));
6262
return;
6363
}
64+
// CFR->CFR copy.
65+
if (LoongArch::CFRRegClass.contains(DstReg, SrcReg)) {
66+
BuildMI(MBB, MBBI, DL, get(LoongArch::PseudoCopyCFR), DstReg)
67+
.addReg(SrcReg, getKillRegState(KillSrc));
68+
return;
69+
}
6470

6571
// FPR->FPR copies.
6672
unsigned Opc;

llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -98,13 +98,6 @@ LoongArchRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
9898
if (TFI->hasBP(MF))
9999
markSuperRegs(Reserved, LoongArchABI::getBPReg()); // bp
100100

101-
// FIXME: To avoid generating COPY instructions between CFRs, only use $fcc0.
102-
// This is required to work around the fact that COPY instruction between CFRs
103-
// is not provided in LoongArch.
104-
if (MF.getSubtarget<LoongArchSubtarget>().hasBasicF())
105-
for (size_t Reg = LoongArch::FCC1; Reg <= LoongArch::FCC7; ++Reg)
106-
markSuperRegs(Reserved, Reg);
107-
108101
assert(checkAllSuperRegsMarked(Reserved));
109102
return Reserved;
110103
}

llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ LoongArchTargetMachine::getTargetTransformInfo(const Function &F) const {
180180
void LoongArchPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); }
181181

182182
void LoongArchPassConfig::addPreEmitPass2() {
183+
addPass(createLoongArchExpandPseudoPass());
183184
// Schedule the expansion of AtomicPseudos at the last possible moment,
184185
// avoiding the possibility for other passes to break the requirements for
185186
// forward progress in the LL/SC block.

llvm/test/CodeGen/LoongArch/O0-pipeline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
7070
; CHECK-NEXT: Machine Optimization Remark Emitter
7171
; CHECK-NEXT: Stack Frame Layout Analysis
72+
; CHECK-NEXT: LoongArch pseudo instruction expansion pass
7273
; CHECK-NEXT: LoongArch atomic pseudo instruction expansion pass
7374
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
7475
; CHECK-NEXT: Machine Optimization Remark Emitter
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
# RUN: llc --mtriple=loongarch64 --mattr=+d %s -o - | FileCheck %s
3+
4+
## Check the PseudoCopyCFR instruction expand.
5+
6+
--- |
7+
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
8+
target triple = "loongarch64"
9+
10+
define void @test() {
11+
; CHECK-LABEL: test:
12+
; CHECK: # %bb.0:
13+
; CHECK-NEXT: fcmp.caf.s $fcc1, $fa0, $fa0
14+
; CHECK-NEXT: bceqz $fcc0, .LBB0_2
15+
; CHECK-NEXT: # %bb.1:
16+
; CHECK-NEXT: fcmp.cueq.s $fcc1, $fa0, $fa0
17+
; CHECK-NEXT: .LBB0_2:
18+
; CHECK-NEXT: movcf2gr $a0, $fcc1
19+
; CHECK-NEXT: ret
20+
ret void
21+
}
22+
...
23+
---
24+
name: test
25+
tracksRegLiveness: true
26+
body: |
27+
bb.0:
28+
liveins: $fcc0
29+
30+
$fcc1 = COPY $fcc0
31+
$r4 = COPY $fcc1
32+
PseudoRET implicit killed $r4
33+
34+
...
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc --mtriple=loongarch64 --mattr=+d --stop-after=postrapseudos %s \
3+
# RUN: -o - | FileCheck %s
4+
5+
## Check the COPY instruction between CFRs.
6+
## A pseudo (PseudoCopyCFR) is generated after postrapseudos pass.
7+
8+
...
9+
---
10+
name: test
11+
tracksRegLiveness: true
12+
body: |
13+
bb.0.entry:
14+
liveins: $fcc0
15+
16+
; CHECK-LABEL: name: test
17+
; CHECK: liveins: $fcc0
18+
; CHECK-NEXT: {{ $}}
19+
; CHECK-NEXT: $fcc1 = PseudoCopyCFR $fcc0
20+
; CHECK-NEXT: $r4 = MOVCF2GR killed $fcc1
21+
; CHECK-NEXT: PseudoRET implicit killed $r4
22+
$fcc1 = COPY $fcc0
23+
$r4 = COPY $fcc1
24+
PseudoRET implicit killed $r4
25+
26+
...

llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc --mtriple=loongarch64 --mattr=+d --run-pass=greedy %s -o - | FileCheck %s
2+
# RUN: llc --mtriple=loongarch64 --mattr=+d --regalloc=fast \
3+
# RUN: --stop-before=postra-machine-sink %s -o - | FileCheck %s
34

45
## Check that fcc register clobbered by inlineasm is correctly saved by examing
56
## a pair of pseudos (PseudoST_CFR and PseudoLD_CFR) are generated before and
@@ -15,13 +16,11 @@ body: |
1516
; CHECK-LABEL: name: test
1617
; CHECK: liveins: $f0_64, $f1_64
1718
; CHECK-NEXT: {{ $}}
18-
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $f1_64
19-
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $f0_64
20-
; CHECK-NEXT: [[FCMP_CLT_D:%[0-9]+]]:cfr = FCMP_CLT_D [[COPY]], [[COPY1]]
21-
; CHECK-NEXT: PseudoST_CFR [[FCMP_CLT_D]], %stack.0, 0 :: (store (s64) into %stack.0)
19+
; CHECK-NEXT: renamable $fcc0 = FCMP_CLT_D renamable $f1_64, renamable $f0_64
20+
; CHECK-NEXT: PseudoST_CFR $fcc0, %stack.0, 0 :: (store (s64) into %stack.0)
2221
; CHECK-NEXT: INLINEASM &nop, 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $fcc0
23-
; CHECK-NEXT: [[PseudoLD_CFR:%[0-9]+]]:cfr = PseudoLD_CFR %stack.0, 0 :: (load (s64) from %stack.0)
24-
; CHECK-NEXT: $r4 = COPY [[PseudoLD_CFR]]
22+
; CHECK-NEXT: $fcc0 = PseudoLD_CFR %stack.0, 0 :: (load (s64) from %stack.0)
23+
; CHECK-NEXT: $r4 = COPY killed renamable $fcc0
2524
; CHECK-NEXT: PseudoRET implicit killed $r4
2625
%1:fpr64 = COPY $f1_64
2726
%0:fpr64 = COPY $f0_64

llvm/test/CodeGen/LoongArch/opt-pipeline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@
165165
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
166166
; CHECK-NEXT: Machine Optimization Remark Emitter
167167
; CHECK-NEXT: Stack Frame Layout Analysis
168+
; CHECK-NEXT: LoongArch pseudo instruction expansion pass
168169
; CHECK-NEXT: LoongArch atomic pseudo instruction expansion pass
169170
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
170171
; CHECK-NEXT: Machine Optimization Remark Emitter

0 commit comments

Comments
 (0)