Skip to content

Commit 6d7d7f1

Browse files
committed
[GlobalISel] Refactor extractParts()
Try to use unmerge when doing irregular splits where possible, falling back to extract elements when not.
1 parent c4f29ff commit 6d7d7f1

File tree

4 files changed

+96
-65
lines changed

4 files changed

+96
-65
lines changed

llvm/lib/CodeGen/GlobalISel/Utils.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,47 @@ bool llvm::extractParts(Register Reg, LLT RegTy, LLT MainTy, LLT &LeftoverTy,
506506
return true;
507507
}
508508

509+
// Try to use unmerge for irregular vector split where possible
510+
if (RegTy.isVector() && MainTy.isVector()) {
511+
unsigned RegNumElts = RegTy.getNumElements();
512+
unsigned MainNumElts = MainTy.getNumElements();
513+
unsigned LeftoverNumElts = RegNumElts % MainNumElts;
514+
// If can unmerge to LeftoverTy, do it
515+
if (MainNumElts % LeftoverNumElts == 0 &&
516+
RegNumElts % LeftoverNumElts == 0 &&
517+
RegTy.getScalarSizeInBits() == MainTy.getScalarSizeInBits() &&
518+
LeftoverNumElts > 1) {
519+
LeftoverTy =
520+
LLT::fixed_vector(LeftoverNumElts, RegTy.getScalarSizeInBits());
521+
522+
// Unmerge the SrcReg to LeftoverTy vectors
523+
SmallVector<Register, 4> UnmergeValues;
524+
extractParts(Reg, LeftoverTy, RegNumElts / LeftoverNumElts, UnmergeValues,
525+
MIRBuilder, MRI);
526+
527+
// Find how many LeftoverTy makes one MainTy
528+
unsigned LeftoverPerMain = MainNumElts / LeftoverNumElts;
529+
unsigned NumOfLeftoverVal =
530+
((RegNumElts % MainNumElts) / LeftoverNumElts);
531+
532+
// Create as many MainTy as possible using unmerged value
533+
SmallVector<Register, 4> MergeValues;
534+
for (unsigned I = 0; I < UnmergeValues.size() - NumOfLeftoverVal; I++) {
535+
MergeValues.push_back(UnmergeValues[I]);
536+
if (MergeValues.size() == LeftoverPerMain) {
537+
VRegs.push_back(
538+
MIRBuilder.buildMergeLikeInstr(MainTy, MergeValues).getReg(0));
539+
MergeValues.clear();
540+
}
541+
}
542+
// Populate LeftoverRegs with the leftovers
543+
for (unsigned I = UnmergeValues.size() - NumOfLeftoverVal;
544+
I < UnmergeValues.size(); I++) {
545+
LeftoverRegs.push_back(UnmergeValues[I]);
546+
}
547+
return true;
548+
}
549+
}
509550
// Perform irregular split. Leftover is last element of RegPieces.
510551
if (MainTy.isVector()) {
511552
SmallVector<Register, 8> RegPieces;

llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp

Lines changed: 33 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -336,53 +336,45 @@ void applyExtAddvToUdotAddv(MachineInstr &MI, MachineRegisterInfo &MRI,
336336
SmallVector<Register, 4> Ext1UnmergeReg;
337337
SmallVector<Register, 4> Ext2UnmergeReg;
338338
if (SrcTy.getNumElements() % 16 != 0) {
339-
// Unmerge source to v8i8, append a new v8i8 of 0s and the merge to v16s
340-
SmallVector<Register, 4> PadUnmergeDstReg1;
341-
SmallVector<Register, 4> PadUnmergeDstReg2;
342-
unsigned NumOfVec = SrcTy.getNumElements() / 8;
343-
344-
// Unmerge the source to v8i8
345-
MachineInstr *PadUnmerge1 =
346-
Builder.buildUnmerge(LLT::fixed_vector(8, 8), Ext1SrcReg);
347-
MachineInstr *PadUnmerge2 =
348-
Builder.buildUnmerge(LLT::fixed_vector(8, 8), Ext2SrcReg);
349-
for (unsigned i = 0; i < NumOfVec; i++) {
350-
PadUnmergeDstReg1.push_back(PadUnmerge1->getOperand(i).getReg());
351-
PadUnmergeDstReg2.push_back(PadUnmerge2->getOperand(i).getReg());
339+
SmallVector<Register, 1> Leftover1;
340+
SmallVector<Register, 1> Leftover2;
341+
342+
// Split the elements into v16i8 and v8i8
343+
LLT MainTy = LLT::fixed_vector(16, 8);
344+
LLT LeftoverTy1, LeftoverTy2;
345+
if ((!extractParts(Ext1SrcReg, MRI.getType(Ext1SrcReg), MainTy,
346+
LeftoverTy1, Ext1UnmergeReg, Leftover1, Builder,
347+
MRI)) ||
348+
(!extractParts(Ext2SrcReg, MRI.getType(Ext2SrcReg), MainTy,
349+
LeftoverTy2, Ext2UnmergeReg, Leftover2, Builder,
350+
MRI))) {
351+
llvm_unreachable("Unable to split this vector properly");
352352
}
353353

354-
// Pad the vectors with a v8i8 constant of 0s
354+
// Pad the leftover v8i8 vector with register of 0s of type v8i8
355355
MachineInstr *v8Zeroes =
356356
Builder.buildConstant(LLT::fixed_vector(8, 8), 0);
357-
PadUnmergeDstReg1.push_back(v8Zeroes->getOperand(0).getReg());
358-
PadUnmergeDstReg2.push_back(v8Zeroes->getOperand(0).getReg());
359-
360-
// Merge them all back to v16i8
361-
NumOfVec = (NumOfVec + 1) / 2;
362-
for (unsigned i = 0; i < NumOfVec; i++) {
363-
Ext1UnmergeReg.push_back(
364-
Builder
365-
.buildMergeLikeInstr(
366-
LLT::fixed_vector(16, 8),
367-
{PadUnmergeDstReg1[i * 2], PadUnmergeDstReg1[(i * 2) + 1]})
368-
.getReg(0));
369-
Ext2UnmergeReg.push_back(
370-
Builder
371-
.buildMergeLikeInstr(
372-
LLT::fixed_vector(16, 8),
373-
{PadUnmergeDstReg2[i * 2], PadUnmergeDstReg2[(i * 2) + 1]})
374-
.getReg(0));
375-
}
357+
Leftover1.push_back(v8Zeroes->getOperand(0).getReg());
358+
Leftover2.push_back(v8Zeroes->getOperand(0).getReg());
359+
360+
Ext1UnmergeReg.push_back(
361+
Builder
362+
.buildMergeLikeInstr(LLT::fixed_vector(16, 8),
363+
{Leftover1[0], Leftover1[1]})
364+
.getReg(0));
365+
Ext2UnmergeReg.push_back(
366+
Builder
367+
.buildMergeLikeInstr(LLT::fixed_vector(16, 8),
368+
{Leftover2[0], Leftover2[1]})
369+
.getReg(0));
370+
376371
} else {
377372
// Unmerge the source vectors to v16i8
378-
MachineInstr *Ext1Unmerge =
379-
Builder.buildUnmerge(LLT::fixed_vector(16, 8), Ext1SrcReg);
380-
MachineInstr *Ext2Unmerge =
381-
Builder.buildUnmerge(LLT::fixed_vector(16, 8), Ext2SrcReg);
382-
for (unsigned i = 0, e = SrcTy.getNumElements() / 16; i < e; i++) {
383-
Ext1UnmergeReg.push_back(Ext1Unmerge->getOperand(i).getReg());
384-
Ext2UnmergeReg.push_back(Ext2Unmerge->getOperand(i).getReg());
385-
}
373+
unsigned SrcNumElts = SrcTy.getNumElements();
374+
extractParts(Ext1SrcReg, LLT::fixed_vector(16, 8), SrcNumElts / 16,
375+
Ext1UnmergeReg, Builder, MRI);
376+
extractParts(Ext2SrcReg, LLT::fixed_vector(16, 8), SrcNumElts / 16,
377+
Ext2UnmergeReg, Builder, MRI);
386378
}
387379

388380
// Build the UDOT instructions

llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -460,12 +460,14 @@ body: |
460460
; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF]](<4 x s32>), [[C3]](s64)
461461
; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF1]](<4 x s32>), [[C]](s64)
462462
; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF1]](<4 x s32>), [[C1]](s64)
463-
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32)
464-
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EVEC4]](s32), [[EVEC5]](s32)
465-
; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[COPY8]](p0) :: (store (<4 x s32>), align 32)
463+
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32)
464+
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EVEC2]](s32), [[EVEC3]](s32)
465+
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EVEC4]](s32), [[EVEC5]](s32)
466+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR2]](<2 x s32>), [[BUILD_VECTOR3]](<2 x s32>)
467+
; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s32>), [[COPY8]](p0) :: (store (<4 x s32>), align 32)
466468
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
467469
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C4]](s64)
468-
; CHECK-NEXT: G_STORE [[BUILD_VECTOR3]](<2 x s32>), [[PTR_ADD]](p0) :: (store (<2 x s32>) into unknown-address + 16, align 16)
470+
; CHECK-NEXT: G_STORE [[BUILD_VECTOR4]](<2 x s32>), [[PTR_ADD]](p0) :: (store (<2 x s32>) into unknown-address + 16, align 16)
469471
; CHECK-NEXT: RET_ReallyLR
470472
%3:_(s32) = COPY $s0
471473
%4:_(s32) = COPY $s1

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6828,55 +6828,51 @@ body: |
68286828
; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
68296829
; SI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
68306830
; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>)
6831-
; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
6832-
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
6833-
; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>), [[UV8:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
6834-
; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
6831+
; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
6832+
; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV]](<2 x s32>), [[UV1]](<2 x s32>)
6833+
; SI-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
68356834
; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
68366835
; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
6837-
; SI-NEXT: G_STORE [[UV8]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
6836+
; SI-NEXT: G_STORE [[UV2]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
68386837
;
68396838
; CI-LABEL: name: test_store_global_v12s16_align4
68406839
; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
68416840
; CI-NEXT: {{ $}}
68426841
; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
68436842
; CI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
68446843
; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>)
6845-
; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
6846-
; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
6847-
; CI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>), [[UV8:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
6848-
; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
6844+
; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
6845+
; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV]](<2 x s32>), [[UV1]](<2 x s32>)
6846+
; CI-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
68496847
; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
68506848
; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
6851-
; CI-NEXT: G_STORE [[UV8]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
6849+
; CI-NEXT: G_STORE [[UV2]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
68526850
;
68536851
; VI-LABEL: name: test_store_global_v12s16_align4
68546852
; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
68556853
; VI-NEXT: {{ $}}
68566854
; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
68576855
; VI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
68586856
; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>)
6859-
; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
6860-
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
6861-
; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>), [[UV8:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
6862-
; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
6857+
; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
6858+
; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV]](<2 x s32>), [[UV1]](<2 x s32>)
6859+
; VI-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
68636860
; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
68646861
; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
6865-
; VI-NEXT: G_STORE [[UV8]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
6862+
; VI-NEXT: G_STORE [[UV2]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
68666863
;
68676864
; GFX9-LABEL: name: test_store_global_v12s16_align4
68686865
; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
68696866
; GFX9-NEXT: {{ $}}
68706867
; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
68716868
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF
68726869
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>)
6873-
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
6874-
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32)
6875-
; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>), [[UV8:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
6876-
; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
6870+
; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>)
6871+
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV]](<2 x s32>), [[UV1]](<2 x s32>)
6872+
; GFX9-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1)
68776873
; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
68786874
; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
6879-
; GFX9-NEXT: G_STORE [[UV8]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
6875+
; GFX9-NEXT: G_STORE [[UV2]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1)
68806876
%0:_(p1) = COPY $vgpr0_vgpr1
68816877
%1:_(<12 x s16>) = G_IMPLICIT_DEF
68826878
G_STORE %1, %0 :: (store (<12 x s16>), align 16, addrspace 1)

0 commit comments

Comments
 (0)