Skip to content

Commit f2cbd1f

Browse files
committed
[LoongArch] Add codegen support for insertelement
1 parent cfe1ece commit f2cbd1f

File tree

6 files changed

+570
-2
lines changed

6 files changed

+570
-2
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
246246
// FIXME: For BUILD_VECTOR, it is temporarily set to `Legal` here, and it
247247
// will be `Custom` handled in the future.
248248
setOperationAction(ISD::BUILD_VECTOR, VT, Legal);
249-
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
249+
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
250250
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
251251
}
252252
for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
@@ -276,7 +276,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
276276

277277
// FIXME: Same as above.
278278
setOperationAction(ISD::BUILD_VECTOR, VT, Legal);
279-
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
279+
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
280280
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
281281
}
282282
for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
@@ -380,10 +380,20 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
380380
return lowerRETURNADDR(Op, DAG);
381381
case ISD::WRITE_REGISTER:
382382
return lowerWRITE_REGISTER(Op, DAG);
383+
case ISD::INSERT_VECTOR_ELT:
384+
return lowerINSERT_VECTOR_ELT(Op, DAG);
383385
}
384386
return SDValue();
385387
}
386388

389+
SDValue
390+
LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
391+
SelectionDAG &DAG) const {
392+
if (isa<ConstantSDNode>(Op->getOperand(2)))
393+
return Op;
394+
return SDValue();
395+
}
396+
387397
SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
388398
SelectionDAG &DAG) const {
389399
SDLoc DL(Op);
@@ -3067,6 +3077,71 @@ emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
30673077
return SinkBB;
30683078
}
30693079

3080+
static MachineBasicBlock *
3081+
emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
3082+
const LoongArchSubtarget &Subtarget) {
3083+
unsigned InsOp;
3084+
unsigned HalfSize;
3085+
switch (MI.getOpcode()) {
3086+
default:
3087+
llvm_unreachable("Unexpected opcode");
3088+
case LoongArch::PseudoXVINSGR2VR_B:
3089+
HalfSize = 16;
3090+
InsOp = LoongArch::VINSGR2VR_B;
3091+
break;
3092+
case LoongArch::PseudoXVINSGR2VR_H:
3093+
HalfSize = 8;
3094+
InsOp = LoongArch::VINSGR2VR_H;
3095+
break;
3096+
}
3097+
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3098+
const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
3099+
const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
3100+
DebugLoc DL = MI.getDebugLoc();
3101+
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
3102+
// XDst = vector_insert XSrc, Elt, Idx
3103+
Register XDst = MI.getOperand(0).getReg();
3104+
Register XSrc = MI.getOperand(1).getReg();
3105+
Register Elt = MI.getOperand(2).getReg();
3106+
unsigned Idx = MI.getOperand(3).getImm();
3107+
3108+
Register ScratchReg1 = XSrc;
3109+
if (Idx >= HalfSize) {
3110+
ScratchReg1 = MRI.createVirtualRegister(RC);
3111+
BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
3112+
.addReg(XSrc)
3113+
.addReg(XSrc)
3114+
.addImm(1);
3115+
}
3116+
3117+
Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
3118+
Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
3119+
BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
3120+
.addReg(ScratchReg1, 0, LoongArch::sub_128);
3121+
BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
3122+
.addReg(ScratchSubReg1)
3123+
.addReg(Elt)
3124+
.addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
3125+
3126+
Register ScratchReg2 = XDst;
3127+
if (Idx >= HalfSize)
3128+
ScratchReg2 = MRI.createVirtualRegister(RC);
3129+
3130+
BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
3131+
.addImm(0)
3132+
.addReg(ScratchSubReg2)
3133+
.addImm(LoongArch::sub_128);
3134+
3135+
if (Idx >= HalfSize)
3136+
BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
3137+
.addReg(XSrc)
3138+
.addReg(ScratchReg2)
3139+
.addImm(2);
3140+
3141+
MI.eraseFromParent();
3142+
return BB;
3143+
}
3144+
30703145
MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
30713146
MachineInstr &MI, MachineBasicBlock *BB) const {
30723147
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
@@ -3122,6 +3197,9 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
31223197
case LoongArch::PseudoXVBNZ_W:
31233198
case LoongArch::PseudoXVBNZ_D:
31243199
return emitVecCondBranchPseudo(MI, BB, Subtarget);
3200+
case LoongArch::PseudoXVINSGR2VR_B:
3201+
case LoongArch::PseudoXVINSGR2VR_H:
3202+
return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
31253203
}
31263204
}
31273205

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ class LoongArchTargetLowering : public TargetLowering {
276276
SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
277277
SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
278278
SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const;
279+
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
279280

280281
bool isFPImmLegal(const APFloat &Imm, EVT VT,
281282
bool ForCodeSize) const override;

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,6 +1065,13 @@ def PseudoXVBZ_W : VecCond<loongarch_vall_zero, v8i32, LASX256>;
10651065
def PseudoXVBZ_D : VecCond<loongarch_vall_zero, v4i64, LASX256>;
10661066
def PseudoXVBZ : VecCond<loongarch_vany_zero, v32i8, LASX256>;
10671067

1068+
let usesCustomInserter = 1, Constraints = "$xd = $dst" in {
1069+
def PseudoXVINSGR2VR_B
1070+
: Pseudo<(outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, uimm5:$imm)>;
1071+
def PseudoXVINSGR2VR_H
1072+
: Pseudo<(outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, uimm4:$imm)>;
1073+
} // usesCustomInserter = 1, Constraints = "$xd = $dst"
1074+
10681075
} // Predicates = [HasExtLASX]
10691076

10701077
multiclass PatXr<SDPatternOperator OpNode, string Inst> {
@@ -1365,12 +1372,23 @@ def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa),
13651372
def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa),
13661373
(XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>;
13671374

1375+
// PseudoXVINSGR2VR_{B/H}
1376+
def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm),
1377+
(PseudoXVINSGR2VR_B v32i8:$xd, GRLenVT:$rj, uimm5:$imm)>;
1378+
def : Pat<(vector_insert v16i16:$xd, GRLenVT:$rj, uimm4:$imm),
1379+
(PseudoXVINSGR2VR_H v16i16:$xd, GRLenVT:$rj, uimm4:$imm)>;
1380+
13681381
// XVINSGR2VR_{W/D}
13691382
def : Pat<(vector_insert v8i32:$xd, GRLenVT:$rj, uimm3:$imm),
13701383
(XVINSGR2VR_W v8i32:$xd, GRLenVT:$rj, uimm3:$imm)>;
13711384
def : Pat<(vector_insert v4i64:$xd, GRLenVT:$rj, uimm2:$imm),
13721385
(XVINSGR2VR_D v4i64:$xd, GRLenVT:$rj, uimm2:$imm)>;
13731386

1387+
def : Pat<(vector_insert v8f32:$vd, FPR32:$fj, uimm3:$imm),
1388+
(XVINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm3:$imm)>;
1389+
def : Pat<(vector_insert v4f64:$vd, FPR64:$fj, uimm2:$imm),
1390+
(XVINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm2:$imm)>;
1391+
13741392
// XVPICKVE2GR_W[U]
13751393
def : Pat<(loongarch_vpick_sext_elt v8i32:$xd, uimm3:$imm, i32),
13761394
(XVPICKVE2GR_W v8i32:$xd, uimm3:$imm)>;

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1462,6 +1462,11 @@ def : Pat<(vector_insert v4i32:$vd, GRLenVT:$rj, uimm2:$imm),
14621462
def : Pat<(vector_insert v2i64:$vd, GRLenVT:$rj, uimm1:$imm),
14631463
(VINSGR2VR_D v2i64:$vd, GRLenVT:$rj, uimm1:$imm)>;
14641464

1465+
def : Pat<(vector_insert v4f32:$vd, FPR32:$fj, uimm2:$imm),
1466+
(VINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm2:$imm)>;
1467+
def : Pat<(vector_insert v2f64:$vd, FPR64:$fj, uimm1:$imm),
1468+
(VINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm1:$imm)>;
1469+
14651470
// VPICKVE2GR_{B/H/W}[U]
14661471
def : Pat<(loongarch_vpick_sext_elt v16i8:$vd, uimm4:$imm, i8),
14671472
(VPICKVE2GR_B v16i8:$vd, uimm4:$imm)>;

0 commit comments

Comments
 (0)