Skip to content

[RISCV] Add a rematerializable pseudo instruction for LUI+ADDI for global addresses. #93352

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1311,6 +1311,26 @@ def : Pat<(FrameAddrRegImm (iPTR GPR:$rs1), simm12:$imm12),

/// HI and ADD_LO address nodes.

// Pseudo for a rematerializable LUI+ADDI sequence for loading an address.
// It will be expanded after register allocation.
// FIXME: The scheduling information does not reflect the multiple instructions.
let Size = 8, isReMaterializable = 1 in
def PseudoMovAddr : Pseudo<(outs GPR:$dst), (ins uimm20_lui:$hi, simm12:$lo), []>,
Sched<[WriteIALU]>;

def riscv_hi_oneuse : unop_oneuse<riscv_hi>;
def addr_hi_lo : PatFrag<(ops node:$hi, node:$lo),
(riscv_add_lo (riscv_hi_oneuse node:$hi), node:$lo)>;

def : Pat<(addr_hi_lo tglobaladdr:$hi, tglobaladdr:$lo),
(PseudoMovAddr tglobaladdr:$hi, tglobaladdr:$lo)>;
def : Pat<(addr_hi_lo tblockaddress:$hi, tblockaddress:$lo),
(PseudoMovAddr tblockaddress:$hi, tblockaddress:$lo)>;
def : Pat<(addr_hi_lo tjumptable:$hi, tjumptable:$lo),
(PseudoMovAddr tjumptable:$hi, tjumptable:$lo)>;
def : Pat<(addr_hi_lo tconstpool:$hi, tconstpool:$lo),
(PseudoMovAddr tconstpool:$hi, tconstpool:$lo)>;

def : Pat<(riscv_hi tglobaladdr:$in), (LUI tglobaladdr:$in)>;
def : Pat<(riscv_hi tblockaddress:$in), (LUI tblockaddress:$in)>;
def : Pat<(riscv_hi tjumptable:$in), (LUI tjumptable:$in)>;
Expand Down
35 changes: 27 additions & 8 deletions llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE,
// 3) The offset value in the Global Address or Constant Pool is 0.
bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
MachineInstr *&Lo) {
if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC)
if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC &&
Hi.getOpcode() != RISCV::PseudoMovAddr)
return false;

const MachineOperand &HiOp1 = Hi.getOperand(1);
Expand All @@ -97,16 +98,22 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
HiOp1.getOffset() != 0)
return false;

Register HiDestReg = Hi.getOperand(0).getReg();
if (!MRI->hasOneUse(HiDestReg))
return false;
if (Hi.getOpcode() == RISCV::PseudoMovAddr) {
// Most of the code should handle it correctly without modification by
// setting Lo and Hi both point to PseudoMovAddr
Lo = &Hi;
} else {
Register HiDestReg = Hi.getOperand(0).getReg();
if (!MRI->hasOneUse(HiDestReg))
return false;

Lo = &*MRI->use_instr_begin(HiDestReg);
if (Lo->getOpcode() != RISCV::ADDI)
return false;
Lo = &*MRI->use_instr_begin(HiDestReg);
if (Lo->getOpcode() != RISCV::ADDI)
return false;
}

const MachineOperand &LoOp2 = Lo->getOperand(2);
if (Hi.getOpcode() == RISCV::LUI) {
if (Hi.getOpcode() == RISCV::LUI || Hi.getOpcode() == RISCV::PseudoMovAddr) {
if (LoOp2.getTargetFlags() != RISCVII::MO_LO ||
!(LoOp2.isGlobal() || LoOp2.isCPI() || LoOp2.isBlockAddress()) ||
LoOp2.getOffset() != 0)
Expand Down Expand Up @@ -466,6 +473,13 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,

Hi.getOperand(1).setOffset(NewOffset);
MachineOperand &ImmOp = Lo.getOperand(2);
// Expand PseudoMovAddr into LUI
if (Hi.getOpcode() == RISCV::PseudoMovAddr) {
auto *TII = ST->getInstrInfo();
Hi.setDesc(TII->get(RISCV::LUI));
Hi.removeOperand(2);
}

if (Hi.getOpcode() != RISCV::AUIPC)
ImmOp.setOffset(NewOffset);

Expand Down Expand Up @@ -501,6 +515,11 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
}
}

// Prevent Lo (originally PseudoMovAddr, which is also pointed by Hi) from
// being erased
if (&Lo == &Hi)
return true;

MRI->replaceRegWith(Lo.getOperand(0).getReg(), Hi.getOperand(0).getReg());
Lo.eraseFromParent();
return true;
Expand Down
23 changes: 23 additions & 0 deletions llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class RISCVPostRAExpandPseudo : public MachineFunctionPass {
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
bool expandMovImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
bool expandMovAddr(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
};

char RISCVPostRAExpandPseudo::ID = 0;
Expand Down Expand Up @@ -75,6 +76,8 @@ bool RISCVPostRAExpandPseudo::expandMI(MachineBasicBlock &MBB,
switch (MBBI->getOpcode()) {
case RISCV::PseudoMovImm:
return expandMovImm(MBB, MBBI);
case RISCV::PseudoMovAddr:
return expandMovAddr(MBB, MBBI);
default:
return false;
}
Expand All @@ -101,6 +104,26 @@ bool RISCVPostRAExpandPseudo::expandMovImm(MachineBasicBlock &MBB,
return true;
}

bool RISCVPostRAExpandPseudo::expandMovAddr(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
DebugLoc DL = MBBI->getDebugLoc();

Register DstReg = MBBI->getOperand(0).getReg();
bool DstIsDead = MBBI->getOperand(0).isDead();
bool Renamable = MBBI->getOperand(0).isRenamable();

BuildMI(MBB, MBBI, DL, TII->get(RISCV::LUI))
.addReg(DstReg, RegState::Define | getRenamableRegState(Renamable))
.add(MBBI->getOperand(1));
BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead) |
getRenamableRegState(Renamable))
.addReg(DstReg, RegState::Kill | getRenamableRegState(Renamable))
.add(MBBI->getOperand(2));
MBBI->eraseFromParent();
return true;
}

} // end of anonymous namespace

INITIALIZE_PASS(RISCVPostRAExpandPseudo, "riscv-expand-pseudolisimm32",
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -383,8 +383,8 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
; RV32I-NEXT: mv a1, s3
; RV32I-NEXT: call __mulsi3
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: lui a0, %hi(.LCPI3_0)
; RV32I-NEXT: addi s4, a0, %lo(.LCPI3_0)
; RV32I-NEXT: lui s4, %hi(.LCPI3_0)
; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0)
; RV32I-NEXT: neg a0, s2
; RV32I-NEXT: and a0, s2, a0
; RV32I-NEXT: mv a1, s3
Expand Down Expand Up @@ -442,28 +442,28 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
; RV32M-LABEL: test_cttz_i64:
; RV32M: # %bb.0:
; RV32M-NEXT: lui a2, 30667
; RV32M-NEXT: addi a2, a2, 1329
; RV32M-NEXT: lui a3, %hi(.LCPI3_0)
; RV32M-NEXT: addi a3, a3, %lo(.LCPI3_0)
; RV32M-NEXT: addi a3, a2, 1329
; RV32M-NEXT: lui a2, %hi(.LCPI3_0)
; RV32M-NEXT: addi a2, a2, %lo(.LCPI3_0)
; RV32M-NEXT: bnez a1, .LBB3_3
; RV32M-NEXT: # %bb.1:
; RV32M-NEXT: li a1, 32
; RV32M-NEXT: beqz a0, .LBB3_4
; RV32M-NEXT: .LBB3_2:
; RV32M-NEXT: neg a1, a0
; RV32M-NEXT: and a0, a0, a1
; RV32M-NEXT: mul a0, a0, a2
; RV32M-NEXT: mul a0, a0, a3
; RV32M-NEXT: srli a0, a0, 27
; RV32M-NEXT: add a0, a3, a0
; RV32M-NEXT: add a0, a2, a0
; RV32M-NEXT: lbu a0, 0(a0)
; RV32M-NEXT: li a1, 0
; RV32M-NEXT: ret
; RV32M-NEXT: .LBB3_3:
; RV32M-NEXT: neg a4, a1
; RV32M-NEXT: and a1, a1, a4
; RV32M-NEXT: mul a1, a1, a2
; RV32M-NEXT: mul a1, a1, a3
; RV32M-NEXT: srli a1, a1, 27
; RV32M-NEXT: add a1, a3, a1
; RV32M-NEXT: add a1, a2, a1
; RV32M-NEXT: lbu a1, 0(a1)
; RV32M-NEXT: bnez a0, .LBB3_2
; RV32M-NEXT: .LBB3_4:
Expand Down Expand Up @@ -814,8 +814,8 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind {
; RV32I-NEXT: mv a1, s3
; RV32I-NEXT: call __mulsi3
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lui a0, %hi(.LCPI7_0)
; RV32I-NEXT: addi s4, a0, %lo(.LCPI7_0)
; RV32I-NEXT: lui s4, %hi(.LCPI7_0)
; RV32I-NEXT: addi s4, s4, %lo(.LCPI7_0)
; RV32I-NEXT: neg a0, s1
; RV32I-NEXT: and a0, s1, a0
; RV32I-NEXT: mv a1, s3
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ define signext i32 @ctz_dereferencing_pointer(ptr %b) nounwind {
; RV32I-NEXT: mv a1, s1
; RV32I-NEXT: call __mulsi3
; RV32I-NEXT: mv s0, a0
; RV32I-NEXT: lui a0, %hi(.LCPI0_0)
; RV32I-NEXT: addi s3, a0, %lo(.LCPI0_0)
; RV32I-NEXT: lui s3, %hi(.LCPI0_0)
; RV32I-NEXT: addi s3, s3, %lo(.LCPI0_0)
; RV32I-NEXT: neg a0, s4
; RV32I-NEXT: and a0, s4, a0
; RV32I-NEXT: mv a1, s1
Expand Down Expand Up @@ -511,8 +511,8 @@ define signext i32 @ctz4(i64 %b) nounwind {
; RV32I-NEXT: mv a1, s3
; RV32I-NEXT: call __mulsi3
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: lui a0, %hi(.LCPI6_0)
; RV32I-NEXT: addi s4, a0, %lo(.LCPI6_0)
; RV32I-NEXT: lui s4, %hi(.LCPI6_0)
; RV32I-NEXT: addi s4, s4, %lo(.LCPI6_0)
; RV32I-NEXT: neg a0, s2
; RV32I-NEXT: and a0, s2, a0
; RV32I-NEXT: mv a1, s3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,31 +24,31 @@ define void @_Z3foov() {
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49)
; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma
; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_48)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_48)
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vle8.v v10, (a0)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_46)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_46)
; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: vle16.v v10, (a0)
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_45)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_45)
; CHECK-NEXT: vle16.v v14, (a0)
; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v14, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs2r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_40)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
Original file line number Diff line number Diff line change
Expand Up @@ -389,8 +389,8 @@ define dso_local i32 @load_ga() local_unnamed_addr #0 {
define dso_local i64 @load_ga_8() nounwind {
; RV32I-LABEL: load_ga_8:
; RV32I: # %bb.0: # %entry
; RV32I-NEXT: lui a0, %hi(ga_8)
; RV32I-NEXT: addi a1, a0, %lo(ga_8)
; RV32I-NEXT: lui a1, %hi(ga_8)
; RV32I-NEXT: addi a1, a1, %lo(ga_8)
; RV32I-NEXT: lw a0, 8(a1)
; RV32I-NEXT: lw a1, 12(a1)
; RV32I-NEXT: ret
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,8 @@ define i64 @cttz_i64(i64 %a) nounwind {
; RV32I-NEXT: mv a1, s3
; RV32I-NEXT: call __mulsi3
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: lui a0, %hi(.LCPI3_0)
; RV32I-NEXT: addi s4, a0, %lo(.LCPI3_0)
; RV32I-NEXT: lui s4, %hi(.LCPI3_0)
; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0)
; RV32I-NEXT: neg a0, s2
; RV32I-NEXT: and a0, s2, a0
; RV32I-NEXT: mv a1, s3
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rv32zbb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,8 @@ define i64 @cttz_i64(i64 %a) nounwind {
; RV32I-NEXT: mv a1, s3
; RV32I-NEXT: call __mulsi3
; RV32I-NEXT: mv s1, a0
; RV32I-NEXT: lui a0, %hi(.LCPI3_0)
; RV32I-NEXT: addi s4, a0, %lo(.LCPI3_0)
; RV32I-NEXT: lui s4, %hi(.LCPI3_0)
; RV32I-NEXT: addi s4, s4, %lo(.LCPI3_0)
; RV32I-NEXT: neg a0, s2
; RV32I-NEXT: and a0, s2, a0
; RV32I-NEXT: mv a1, s3
Expand Down
40 changes: 20 additions & 20 deletions llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll
Original file line number Diff line number Diff line change
Expand Up @@ -126,28 +126,28 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) {
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vsaddu.vx v8, v8, a1
; CHECK-NEXT: vmsltu.vx v0, v8, a2
; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0)
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vle8.v v16, (a0)
; CHECK-NEXT: vmsltu.vx v0, v8, a2
; CHECK-NEXT: lui a0, %hi(.LCPI9_1)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_1)
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vsext.vf8 v24, v16
; CHECK-NEXT: vsaddu.vx v16, v24, a1
; CHECK-NEXT: vmsltu.vx v9, v16, a2
; CHECK-NEXT: vsext.vf8 v16, v8
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v8, v16, a2
; CHECK-NEXT: vsext.vf8 v16, v9
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: lui a0, %hi(.LCPI9_2)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_2)
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vmsltu.vx v10, v16, a2
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v0, v8, 2
; CHECK-NEXT: vslideup.vi v0, v9, 2
; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v0, v10, 4
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vsext.vf8 v16, v9
; CHECK-NEXT: vsext.vf8 v16, v8
; CHECK-NEXT: vsaddu.vx v8, v16, a1
; CHECK-NEXT: vmsltu.vx v16, v8, a2
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
Expand All @@ -169,13 +169,13 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vsext.vf8 v16, v8
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v10, v16, a2
; CHECK-NEXT: vmsltu.vx v8, v16, a2
; CHECK-NEXT: vsext.vf8 v16, v9
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v8, v16, a2
; CHECK-NEXT: lui a0, %hi(.LCPI10_2)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_2)
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vmsltu.vx v10, v16, a2
; CHECK-NEXT: lui a0, %hi(.LCPI10_3)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_3)
; CHECK-NEXT: vle8.v v11, (a0)
Expand All @@ -187,10 +187,10 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
; CHECK-NEXT: vmsltu.vx v11, v16, a2
; CHECK-NEXT: vid.v v16
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v0, v16, a2
; CHECK-NEXT: lui a0, %hi(.LCPI10_4)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4)
; CHECK-NEXT: vle8.v v12, (a0)
; CHECK-NEXT: vmsltu.vx v0, v16, a2
; CHECK-NEXT: lui a0, %hi(.LCPI10_5)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_5)
; CHECK-NEXT: vle8.v v13, (a0)
Expand All @@ -201,27 +201,27 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v13, v16, a2
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: vslideup.vi v10, v8, 2
; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v9, 4
; CHECK-NEXT: vslideup.vi v10, v9, 4
; CHECK-NEXT: lui a0, %hi(.LCPI10_6)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_6)
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v11, 6
; CHECK-NEXT: vslideup.vi v10, v11, 6
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v0, v12, 2
; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
; CHECK-NEXT: vslideup.vi v0, v13, 4
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vsext.vf8 v16, v9
; CHECK-NEXT: vsext.vf8 v16, v8
; CHECK-NEXT: vsaddu.vx v16, v16, a1
; CHECK-NEXT: vmsltu.vx v9, v16, a2
; CHECK-NEXT: vmsltu.vx v8, v16, a2
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v0, v9, 6
; CHECK-NEXT: vslideup.vi v0, v8, 6
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vslideup.vi v0, v8, 8
; CHECK-NEXT: vslideup.vi v0, v10, 8
; CHECK-NEXT: ret
%mask = call <128 x i1> @llvm.get.active.lane.mask.v128i1.i64(i64 %index, i64 %tc)
ret <128 x i1> %mask
Expand Down
Loading
Loading