Skip to content

[LoongArch] Reimplement to prevent Pseudo{CALL, LA*}_LARGE instruction reordering #100099

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
645 changes: 309 additions & 336 deletions llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp

Large diffs are not rendered by default.

77 changes: 77 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,83 @@ bool LoongArchInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
}
}

bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
const MachineBasicBlock *MBB,
const MachineFunction &MF) const {
if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
return true;

auto MII = MI.getIterator();
auto MIE = MBB->end();

// According to psABI v2.30:
//
// https://github.com/loongson/la-abi-specs/releases/tag/v2.30
//
// The following instruction patterns are prohibited from being reordered:
//
// * pcaddu18 $ra, %call36(s)
// jirl $ra, $ra, 0
//
// * pcalau12i $a0, %pc_hi20(s)
// addi.d $a1, $zero, %pc_lo12(s)
// lu32i.d $a1, %pc64_lo20(s)
// lu52i.d $a1, $a1, %pc64_hi12(s)
//
// * pcalau12i $a0, %got_pc_hi20(s) | %ld_pc_hi20(s) | %gd_pc_hi20(s)
// addi.d $a1, $zero, %got_pc_lo12(s)
// lu32i.d $a1, %got64_pc_lo20(s)
// lu52i.d $a1, $a1, %got64_pc_hi12(s)
//
// * pcalau12i $a0, %ie_pc_hi20(s)
// addi.d $a1, $zero, %ie_pc_lo12(s)
// lu32i.d $a1, %ie64_pc_lo20(s)
// lu52i.d $a1, $a1, %ie64_pc_hi12(s)
//
// For simplicity, only pcalau12i and lu52i.d are marked as scheduling
// boundaries, and the instructions between them are guaranteed to be
// ordered according to data dependencies.
switch (MI.getOpcode()) {
case LoongArch::PCADDU18I:
if (MI.getOperand(1).getTargetFlags() == LoongArchII::MO_CALL36)
return true;
break;
case LoongArch::PCALAU12I: {
auto AddI = std::next(MII);
if (AddI == MIE || AddI->getOpcode() != LoongArch::ADDI_D)
break;
auto Lu32I = std::next(AddI);
if (Lu32I == MIE || Lu32I->getOpcode() != LoongArch::LU32I_D)
break;
auto MO0 = MI.getOperand(1).getTargetFlags();
auto MO1 = AddI->getOperand(2).getTargetFlags();
auto MO2 = Lu32I->getOperand(2).getTargetFlags();
if (MO0 == LoongArchII::MO_PCREL_HI && MO1 == LoongArchII::MO_PCREL_LO &&
MO2 == LoongArchII::MO_PCREL64_LO)
return true;
if ((MO0 == LoongArchII::MO_GOT_PC_HI || MO0 == LoongArchII::MO_LD_PC_HI ||
MO0 == LoongArchII::MO_GD_PC_HI) &&
MO1 == LoongArchII::MO_GOT_PC_LO && MO2 == LoongArchII::MO_GOT_PC64_LO)
return true;
if (MO0 == LoongArchII::MO_IE_PC_HI && MO1 == LoongArchII::MO_IE_PC_LO &&
MO2 == LoongArchII::MO_IE_PC64_LO)
return true;
break;
}
case LoongArch::LU52I_D: {
auto MO = MI.getOperand(2).getTargetFlags();
if (MO == LoongArchII::MO_PCREL64_HI || MO == LoongArchII::MO_GOT_PC64_HI ||
MO == LoongArchII::MO_IE_PC64_HI)
return true;
break;
}
default:
break;
}

return false;
}

unsigned LoongArchInstrInfo::removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved) const {
if (BytesRemoved)
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo {
bool isBranchOffsetInRange(unsigned BranchOpc,
int64_t BrOffset) const override;

bool isSchedulingBoundary(const MachineInstr &MI,
const MachineBasicBlock *MBB,
const MachineFunction &MF) const override;

unsigned removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved = nullptr) const override;

Expand Down
20 changes: 7 additions & 13 deletions llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1478,7 +1478,7 @@ def : Pat<(loongarch_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>;
def : Pat<(loongarch_call texternalsym:$func), (PseudoCALL texternalsym:$func)>;

// Function call with 'Medium' code model.
let isCall = 1, Defs = [R1, R20], Size = 8 in
let isCall = 1, Defs = [R1] in
def PseudoCALL_MEDIUM : Pseudo<(outs), (ins bare_symbol:$func)>;

let Predicates = [IsLA64] in {
Expand All @@ -1489,7 +1489,7 @@ def : Pat<(loongarch_call_medium texternalsym:$func),
} // Predicates = [IsLA64]

// Function call with 'Large' code model.
let isCall = 1, Defs = [R1, R20], Size = 24 in
let isCall = 1, Defs = [R1] in
def PseudoCALL_LARGE: Pseudo<(outs), (ins bare_symbol:$func)>;

let Predicates = [IsLA64] in {
Expand Down Expand Up @@ -1527,8 +1527,7 @@ def : Pat<(loongarch_tail (iPTR texternalsym:$dst)),
(PseudoTAIL texternalsym:$dst)>;

// Tail call with 'Medium' code model.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
Uses = [R3], Defs = [R20], Size = 8 in
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in
def PseudoTAIL_MEDIUM : Pseudo<(outs), (ins bare_symbol:$dst)>;

let Predicates = [IsLA64] in {
Expand All @@ -1539,8 +1538,7 @@ def : Pat<(loongarch_tail_medium (iPTR texternalsym:$dst)),
} // Predicates = [IsLA64]

// Tail call with 'Large' code model.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
Uses = [R3], Defs = [R19, R20], Size = 24 in
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in
def PseudoTAIL_LARGE : Pseudo<(outs), (ins bare_symbol:$dst)>;

let Predicates = [IsLA64] in {
Expand Down Expand Up @@ -1572,12 +1570,12 @@ def PseudoJIRL_TAIL : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16)>,

/// call36/taill36 macro instructions
let isCall = 1, isBarrier = 1, isCodeGenOnly = 0, isAsmParserOnly = 1,
Defs = [R1], Size = 8, hasSideEffects = 0, mayStore = 0, mayLoad = 0 in
Defs = [R1], hasSideEffects = 0, mayStore = 0, mayLoad = 0 in
def PseudoCALL36 : Pseudo<(outs), (ins bare_symbol:$dst), [],
"call36", "$dst">,
Requires<[IsLA64]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3],
isCodeGenOnly = 0, isAsmParserOnly = 1, Size = 8, hasSideEffects = 0,
isCodeGenOnly = 0, isAsmParserOnly = 1, hasSideEffects = 0,
mayStore = 0, mayLoad = 0 in
def PseudoTAIL36 : Pseudo<(outs), (ins GPR:$tmp, bare_symbol:$dst), [],
"tail36", "$tmp, $dst">,
Expand Down Expand Up @@ -1614,7 +1612,6 @@ def PseudoLA_TLS_LD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"la.tls.ld", "$dst, $src">;
def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"la.tls.gd", "$dst, $src">;
let Defs = [R20], Size = 20 in {
def PseudoLA_PCREL_LARGE : Pseudo<(outs GPR:$dst),
(ins GPR:$tmp, bare_symbol:$src), [],
"la.pcrel", "$dst, $tmp, $src">,
Expand All @@ -1629,15 +1626,13 @@ def PseudoLA_TLS_GD_LARGE : Pseudo<(outs GPR:$dst),
(ins GPR:$tmp, bare_symbol:$src), [],
"la.tls.gd", "$dst, $tmp, $src">,
Requires<[IsLA64]>;
} // Defs = [R20], Size = 20
}
let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0,
isAsmParserOnly = 1 in {
def PseudoLA_GOT : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"la.got", "$dst, $src">;
def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"la.tls.ie", "$dst, $src">;
let Defs = [R20], Size = 20 in {
def PseudoLA_GOT_LARGE : Pseudo<(outs GPR:$dst),
(ins GPR:$tmp, bare_symbol:$src), [],
"la.got", "$dst, $tmp, $src">,
Expand All @@ -1646,7 +1641,6 @@ def PseudoLA_TLS_IE_LARGE : Pseudo<(outs GPR:$dst),
(ins GPR:$tmp, bare_symbol:$src), [],
"la.tls.ie", "$dst, $tmp, $src">,
Requires<[IsLA64]>;
} // Defs = [R20], Size = 20
}

// Used for expand PseudoLA_TLS_DESC_* instructions.
Expand All @@ -1671,7 +1665,7 @@ def PseudoLA_TLS_DESC_PC : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
}

let isCall = 1, isBarrier = 1, hasSideEffects = 0, mayStore = 0, mayLoad = 0,
isCodeGenOnly = 0, isAsmParserOnly = 1, Defs = [R1, R4, R20], Size = 32 in
isCodeGenOnly = 0, isAsmParserOnly = 1, Defs = [R1, R4] in
def PseudoLA_TLS_DESC_PC_LARGE : Pseudo<(outs GPR:$dst),
(ins GPR:$tmp, bare_symbol:$src), [],
"la.tls.desc", "$dst, $tmp, $src">,
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/LoongArch/code-models.ll
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ define i32 @call_globaladdress(i32 %a) nounwind {
; LARGE: # %bb.0:
; LARGE-NEXT: addi.d $sp, $sp, -16
; LARGE-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
; LARGE-NEXT: pcalau12i $ra, %got_pc_hi20(callee)
; LARGE-NEXT: addi.d $t8, $zero, %got_pc_lo12(callee)
; LARGE-NEXT: lu32i.d $t8, %got64_pc_lo20(callee)
; LARGE-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(callee)
; LARGE-NEXT: ldx.d $ra, $t8, $ra
; LARGE-NEXT: pcalau12i $a1, %got_pc_hi20(callee)
; LARGE-NEXT: addi.d $ra, $zero, %got_pc_lo12(callee)
; LARGE-NEXT: lu32i.d $ra, %got64_pc_lo20(callee)
; LARGE-NEXT: lu52i.d $ra, $ra, %got64_pc_hi12(callee)
; LARGE-NEXT: ldx.d $ra, $ra, $a1
; LARGE-NEXT: jirl $ra, $ra, 0
; LARGE-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
; LARGE-NEXT: addi.d $sp, $sp, 16
Expand Down Expand Up @@ -82,11 +82,11 @@ define void @call_external_sym(ptr %dst) {
; LARGE-NEXT: .cfi_offset 1, -8
; LARGE-NEXT: ori $a2, $zero, 1000
; LARGE-NEXT: move $a1, $zero
; LARGE-NEXT: pcalau12i $ra, %pc_hi20(memset)
; LARGE-NEXT: addi.d $t8, $zero, %pc_lo12(memset)
; LARGE-NEXT: lu32i.d $t8, %pc64_lo20(memset)
; LARGE-NEXT: lu52i.d $t8, $t8, %pc64_hi12(memset)
; LARGE-NEXT: add.d $ra, $t8, $ra
; LARGE-NEXT: pcalau12i $a3, %pc_hi20(memset)
; LARGE-NEXT: addi.d $ra, $zero, %pc_lo12(memset)
; LARGE-NEXT: lu32i.d $ra, %pc64_lo20(memset)
; LARGE-NEXT: lu52i.d $ra, $ra, %pc64_hi12(memset)
; LARGE-NEXT: add.d $ra, $ra, $a3
; LARGE-NEXT: jirl $ra, $ra, 0
; LARGE-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
; LARGE-NEXT: addi.d $sp, $sp, 16
Expand All @@ -105,17 +105,17 @@ define i32 @caller_tail(i32 %i) nounwind {
;
; MEDIUM-LABEL: caller_tail:
; MEDIUM: # %bb.0: # %entry
; MEDIUM-NEXT: pcaddu18i $t8, %call36(callee_tail)
; MEDIUM-NEXT: jr $t8
; MEDIUM-NEXT: pcaddu18i $a1, %call36(callee_tail)
; MEDIUM-NEXT: jr $a1
;
; LARGE-LABEL: caller_tail:
; LARGE: # %bb.0: # %entry
; LARGE-NEXT: pcalau12i $t7, %got_pc_hi20(callee_tail)
; LARGE-NEXT: addi.d $t8, $zero, %got_pc_lo12(callee_tail)
; LARGE-NEXT: lu32i.d $t8, %got64_pc_lo20(callee_tail)
; LARGE-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(callee_tail)
; LARGE-NEXT: ldx.d $t7, $t8, $t7
; LARGE-NEXT: jr $t7
; LARGE-NEXT: pcalau12i $a1, %got_pc_hi20(callee_tail)
; LARGE-NEXT: addi.d $a2, $zero, %got_pc_lo12(callee_tail)
; LARGE-NEXT: lu32i.d $a2, %got64_pc_lo20(callee_tail)
; LARGE-NEXT: lu52i.d $a2, $a2, %got64_pc_hi12(callee_tail)
; LARGE-NEXT: ldx.d $a1, $a2, $a1
; LARGE-NEXT: jr $a1
entry:
%r = tail call i32 @callee_tail(i32 %i)
ret i32 %r
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/LoongArch/expand-call.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; RUN: llc --mtriple=loongarch64 -mattr=+d --stop-before loongarch-prera-expand-pseudo \
; RUN: --verify-machineinstrs < %s | FileCheck %s --check-prefix=NOEXPAND
; RUN: llc --mtriple=loongarch64 -mattr=+d --stop-before machine-opt-remark-emitter \
; RUN: llc --mtriple=loongarch64 --stop-after loongarch-prera-expand-pseudo \
; RUN: --verify-machineinstrs < %s | FileCheck %s --check-prefix=EXPAND

declare void @callee()
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/LoongArch/global-address.ll
Original file line number Diff line number Diff line change
Expand Up @@ -53,32 +53,32 @@ define void @foo() nounwind {
; LA64LARGENOPIC-LABEL: foo:
; LA64LARGENOPIC: # %bb.0:
; LA64LARGENOPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G)
; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(G)
; LA64LARGENOPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(G)
; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G)
; LA64LARGENOPIC-NEXT: ldx.d $a0, $t8, $a0
; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %got_pc_lo12(G)
; LA64LARGENOPIC-NEXT: lu32i.d $a1, %got64_pc_lo20(G)
; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(G)
; LA64LARGENOPIC-NEXT: ldx.d $a0, $a1, $a0
; LA64LARGENOPIC-NEXT: ld.w $zero, $a0, 0
; LA64LARGENOPIC-NEXT: pcalau12i $a0, %pc_hi20(g)
; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %pc_lo12(g)
; LA64LARGENOPIC-NEXT: lu32i.d $t8, %pc64_lo20(g)
; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(g)
; LA64LARGENOPIC-NEXT: add.d $a0, $t8, $a0
; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %pc_lo12(g)
; LA64LARGENOPIC-NEXT: lu32i.d $a1, %pc64_lo20(g)
; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g)
; LA64LARGENOPIC-NEXT: add.d $a0, $a1, $a0
; LA64LARGENOPIC-NEXT: ld.w $zero, $a0, 0
; LA64LARGENOPIC-NEXT: ret
;
; LA64LARGEPIC-LABEL: foo:
; LA64LARGEPIC: # %bb.0:
; LA64LARGEPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G)
; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(G)
; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(G)
; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G)
; LA64LARGEPIC-NEXT: ldx.d $a0, $t8, $a0
; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %got_pc_lo12(G)
; LA64LARGEPIC-NEXT: lu32i.d $a1, %got64_pc_lo20(G)
; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(G)
; LA64LARGEPIC-NEXT: ldx.d $a0, $a1, $a0
; LA64LARGEPIC-NEXT: ld.w $zero, $a0, 0
; LA64LARGEPIC-NEXT: pcalau12i $a0, %pc_hi20(.Lg$local)
; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %pc_lo12(.Lg$local)
; LA64LARGEPIC-NEXT: lu32i.d $t8, %pc64_lo20(.Lg$local)
; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(.Lg$local)
; LA64LARGEPIC-NEXT: add.d $a0, $t8, $a0
; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %pc_lo12(.Lg$local)
; LA64LARGEPIC-NEXT: lu32i.d $a1, %pc64_lo20(.Lg$local)
; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %pc64_hi12(.Lg$local)
; LA64LARGEPIC-NEXT: add.d $a0, $a1, $a0
; LA64LARGEPIC-NEXT: ld.w $zero, $a0, 0
; LA64LARGEPIC-NEXT: ret
%V = load volatile i32, ptr @G
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/LoongArch/global-variable-code-model.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ define dso_local signext i32 @local_large() #0 {
; CHECK-LABEL: local_large:
; CHECK: # %bb.0:
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(b)
; CHECK-NEXT: addi.d $t8, $zero, %pc_lo12(b)
; CHECK-NEXT: lu32i.d $t8, %pc64_lo20(b)
; CHECK-NEXT: lu52i.d $t8, $t8, %pc64_hi12(b)
; CHECK-NEXT: add.d $a0, $t8, $a0
; CHECK-NEXT: addi.d $a1, $zero, %pc_lo12(b)
; CHECK-NEXT: lu32i.d $a1, %pc64_lo20(b)
; CHECK-NEXT: lu52i.d $a1, $a1, %pc64_hi12(b)
; CHECK-NEXT: add.d $a0, $a1, $a0
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: ret
%1 = load i32, ptr @b, align 4
Expand Down
Loading
Loading