Skip to content

Commit fd13edf

Browse files
committed
buildvector pattern in True16
1 parent 9ba438d commit fd13edf

File tree

4 files changed

+3790
-3795
lines changed

4 files changed

+3790
-3795
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -782,9 +782,22 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(MachineInstr &MI) const {
782782
return true;
783783

784784
// TODO: This should probably be a combine somewhere
785-
// (build_vector $src0, undef) -> copy $src0
786785
MachineInstr *Src1Def = getDefIgnoringCopies(Src1, *MRI);
787786
if (Src1Def->getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
787+
if (Subtarget->useRealTrue16Insts() && IsVector) {
788+
// (vecTy (DivergentBinFrag<build_vector> Ty:$src0, (Ty undef))),
789+
// -> (vecTy (INSERT_SUBREG (IMPLICIT_DEF), VGPR_16:$src0, lo16))
790+
Register Undef = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
791+
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
792+
BuildMI(*BB, &MI, DL, TII.get(TargetOpcode::INSERT_SUBREG), Dst)
793+
.addReg(Undef)
794+
.addReg(Src0)
795+
.addImm(AMDGPU::lo16);
796+
MI.eraseFromParent();
797+
return RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) &&
798+
RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_16RegClass, *MRI);
799+
}
800+
// (build_vector $src0, undef) -> copy $src0
788801
MI.setDesc(TII.get(AMDGPU::COPY));
789802
MI.removeOperand(2);
790803
const auto &RC =

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3359,6 +3359,8 @@ def : GCNPat <
33593359
(COPY_TO_REGCLASS SReg_32:$src0, SReg_32)
33603360
>;
33613361

3362+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3363+
let True16Predicate = p in {
33623364
def : GCNPat <
33633365
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$src0), (Ty undef))),
33643366
(COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32)
@@ -3368,6 +3370,7 @@ def : GCNPat <
33683370
(vecTy (UniformBinFrag<build_vector> (Ty undef), (Ty SReg_32:$src1))),
33693371
(S_LSHL_B32 SReg_32:$src1, (i32 16))
33703372
>;
3373+
}
33713374

33723375
def : GCNPat <
33733376
(vecTy (DivergentBinFrag<build_vector> (Ty undef), (Ty VGPR_32:$src1))),
@@ -3377,6 +3380,8 @@ def : GCNPat <
33773380
}
33783381

33793382
let SubtargetPredicate = HasVOP3PInsts in {
3383+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3384+
let True16Predicate = p in
33803385
def : GCNPat <
33813386
(v2i16 (DivergentBinFrag<build_vector> (i16 VGPR_32:$src0), (i16 VGPR_32:$src1))),
33823387
(v2i16 (V_LSHL_OR_B32_e64 $src1, (i32 16), (i32 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), $src0))))
@@ -3406,12 +3411,25 @@ def : GCNPat <
34063411
(S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
34073412
>;
34083413

3414+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3415+
let True16Predicate = p in
34093416
// Take the lower 16 bits from each VGPR_32 and concat them
34103417
def : GCNPat <
34113418
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$a), (Ty VGPR_32:$b))),
34123419
(V_PERM_B32_e64 VGPR_32:$b, VGPR_32:$a, (S_MOV_B32 (i32 0x05040100)))
34133420
>;
34143421

3422+
let True16Predicate = UseRealTrue16Insts in {
3423+
def : GCNPat <
3424+
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_16:$a), (Ty VGPR_16:$b))),
3425+
(REG_SEQUENCE VGPR_32, VGPR_16:$a, lo16, VGPR_16:$b, hi16)
3426+
>;
3427+
// GISel ignores this Pat, but the equivalent is done in selectG_BUILD_VECTOR
3428+
def : GCNPat <
3429+
(vecTy (build_vector (Ty VGPR_16:$src0), (Ty undef))),
3430+
(REG_SEQUENCE VGPR_32, $src0, lo16, (IMPLICIT_DEF), hi16)
3431+
>;
3432+
}
34153433

34163434
// Take the lower 16 bits from V[0] and the upper 16 bits from V[1]
34173435
// Special case, can use V_BFI (0xffff literal likely more reusable than 0x70601000)
@@ -3437,6 +3455,8 @@ def : GCNPat <
34373455

34383456
// Take the upper 16 bits from V[0] and the lower 16 bits from V[1]
34393457
// Special case, can use V_ALIGNBIT (always uses encoded literal)
3458+
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
3459+
let True16Predicate = p in
34403460
def : GCNPat <
34413461
(vecTy (DivergentBinFrag<build_vector>
34423462
(Ty !if(!eq(Ty, i16),

0 commit comments

Comments
 (0)