Skip to content

Commit 0c85389

Browse files
committed
buildvector pattern in True16
1 parent 89001d1 commit 0c85389

File tree

4 files changed

+3790
-3795
lines changed

4 files changed

+3790
-3795
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -782,9 +782,22 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(MachineInstr &MI) const {
782782
return true;
783783

784784
// TODO: This should probably be a combine somewhere
785-
// (build_vector $src0, undef) -> copy $src0
786785
MachineInstr *Src1Def = getDefIgnoringCopies(Src1, *MRI);
787786
if (Src1Def->getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
787+
if (Subtarget->useRealTrue16Insts() && IsVector) {
788+
// (vecTy (DivergentBinFrag<build_vector> Ty:$src0, (Ty undef))),
789+
// -> (vecTy (INSERT_SUBREG (IMPLICIT_DEF), VGPR_16:$src0, lo16))
790+
Register Undef = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
791+
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
792+
BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::INSERT_SUBREG), Dst)
793+
.addReg(Undef)
794+
.addReg(Src0)
795+
.addImm(AMDGPU::lo16);
796+
MI.eraseFromParent();
797+
return RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) &&
798+
RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_16RegClass, *MRI);
799+
}
800+
// (build_vector $src0, undef) -> copy $src0
788801
MI.setDesc(TII.get(AMDGPU::COPY));
789802
MI.removeOperand(2);
790803
const auto &RC =

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3346,6 +3346,8 @@ def : GCNPat <
33463346
(COPY_TO_REGCLASS SReg_32:$src0, SReg_32)
33473347
>;
33483348

3349+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3350+
let True16Predicate = p in {
33493351
def : GCNPat <
33503352
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$src0), (Ty undef))),
33513353
(COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32)
@@ -3355,6 +3357,7 @@ def : GCNPat <
33553357
(vecTy (UniformBinFrag<build_vector> (Ty undef), (Ty SReg_32:$src1))),
33563358
(S_LSHL_B32 SReg_32:$src1, (i32 16))
33573359
>;
3360+
}
33583361

33593362
def : GCNPat <
33603363
(vecTy (DivergentBinFrag<build_vector> (Ty undef), (Ty VGPR_32:$src1))),
@@ -3364,6 +3367,8 @@ def : GCNPat <
33643367
}
33653368

33663369
let SubtargetPredicate = HasVOP3PInsts in {
3370+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3371+
let True16Predicate = p in
33673372
def : GCNPat <
33683373
(v2i16 (DivergentBinFrag<build_vector> (i16 VGPR_32:$src0), (i16 VGPR_32:$src1))),
33693374
(v2i16 (V_LSHL_OR_B32_e64 $src1, (i32 16), (i32 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), $src0))))
@@ -3393,12 +3398,25 @@ def : GCNPat <
33933398
(S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
33943399
>;
33953400

3401+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3402+
let True16Predicate = p in
33963403
// Take the lower 16 bits from each VGPR_32 and concat them
33973404
def : GCNPat <
33983405
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$a), (Ty VGPR_32:$b))),
33993406
(V_PERM_B32_e64 VGPR_32:$b, VGPR_32:$a, (S_MOV_B32 (i32 0x05040100)))
34003407
>;
34013408

3409+
let True16Predicate = UseRealTrue16Insts in {
3410+
def : GCNPat <
3411+
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_16:$a), (Ty VGPR_16:$b))),
3412+
(REG_SEQUENCE VGPR_32, VGPR_16:$a, lo16, VGPR_16:$b, hi16)
3413+
>;
3414+
// GISel ignores this Pat, but the equivalent is done in selectG_BUILD_VECTOR
3415+
def : GCNPat <
3416+
(vecTy (build_vector (Ty VGPR_16:$src0), (Ty undef))),
3417+
(REG_SEQUENCE VGPR_32, $src0, lo16, (IMPLICIT_DEF), hi16)
3418+
>;
3419+
}
34023420

34033421
// Take the lower 16 bits from V[0] and the upper 16 bits from V[1]
34043422
// Special case, can use V_BFI (0xffff literal likely more reusable than 0x70601000)
@@ -3424,6 +3442,8 @@ def : GCNPat <
34243442

34253443
// Take the upper 16 bits from V[0] and the lower 16 bits from V[1]
34263444
// Special case, can use V_ALIGNBIT (always uses encoded literal)
3445+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3446+
let True16Predicate = p in
34273447
def : GCNPat <
34283448
(vecTy (DivergentBinFrag<build_vector>
34293449
(Ty !if(!eq(Ty, i16),

0 commit comments

Comments
 (0)