Skip to content

Commit 9b5971a

Browse files
authored
[AArch64][GlobalISel] Lower G_BUILD_VECTOR to G_INSERT_VECTOR_ELT (#105686)
The lowering happens in post-legalizer lowering if any source registers from G_BUILD_VECTOR are not constants. Add pattern pragment setting `scalar_to_vector ($src)` asequivalent to `vector_insert (undef), ($src), (i61 0)`
1 parent ed220e1 commit 9b5971a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+3200
-3647
lines changed

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,15 @@ def build_vector_to_dup : GICombineRule<
222222
(apply [{ applyBuildVectorToDup(*${root}, MRI, B); }])
223223
>;
224224

225-
def build_vector_lowering : GICombineGroup<[build_vector_to_dup]>;
225+
def build_vector_to_vector_insert : GICombineRule<
226+
(defs root:$root, register_matchinfo:$matchinfo),
227+
(match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root,
228+
[{ return matchLowerBuildToInsertVecElt(*${root}, MRI); }]),
229+
(apply [{ applyLowerBuildToInsertVecElt(*${root}, MRI, B); }])
230+
>;
231+
232+
def build_vector_lowering : GICombineGroup<[build_vector_to_dup,
233+
build_vector_to_vector_insert]>;
226234

227235
def lower_vector_fcmp : GICombineRule<
228236
(defs root:$root),

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 38 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3315,6 +3315,10 @@ defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>;
33153315
// Pre-fetch.
33163316
defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">;
33173317

3318+
def vec_ins_or_scal_vec : PatFrags<(ops node:$src),
3319+
[(vector_insert undef, node:$src, (i64 0)),
3320+
(scalar_to_vector node:$src)]>;
3321+
33183322
// For regular load, we do not have any alignment requirement.
33193323
// Thus, it is safe to directly map the vector loads with interesting
33203324
// addressing modes.
@@ -3323,13 +3327,13 @@ multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop,
33233327
ValueType ScalTy, ValueType VecTy,
33243328
Instruction LOADW, Instruction LOADX,
33253329
SubRegIndex sub> {
3326-
def : Pat<(VecTy (scalar_to_vector (ScalTy
3330+
def : Pat<(VecTy (vec_ins_or_scal_vec (ScalTy
33273331
(loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))),
33283332
(INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
33293333
(LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset),
33303334
sub)>;
33313335

3332-
def : Pat<(VecTy (scalar_to_vector (ScalTy
3336+
def : Pat<(VecTy (vec_ins_or_scal_vec (ScalTy
33333337
(loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))),
33343338
(INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
33353339
(LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset),
@@ -3357,12 +3361,12 @@ defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>;
33573361
defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>;
33583362

33593363

3360-
def : Pat <(v1i64 (scalar_to_vector (i64
3364+
def : Pat <(v1i64 (vec_ins_or_scal_vec (i64
33613365
(load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
33623366
ro_Wextend64:$extend))))),
33633367
(LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
33643368

3365-
def : Pat <(v1i64 (scalar_to_vector (i64
3369+
def : Pat <(v1i64 (vec_ins_or_scal_vec (i64
33663370
(load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
33673371
ro_Xextend64:$extend))))),
33683372
(LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
@@ -3495,34 +3499,34 @@ def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
34953499
// Thus, it is safe to directly map the vector loads with interesting
34963500
// addressing modes.
34973501
// FIXME: We could do the same for bitconvert to floating point vectors.
3498-
def : Pat <(v8i8 (scalar_to_vector (i32
3502+
def : Pat <(v8i8 (vec_ins_or_scal_vec (i32
34993503
(extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
35003504
(INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
35013505
(LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
3502-
def : Pat <(v16i8 (scalar_to_vector (i32
3506+
def : Pat <(v16i8 (vec_ins_or_scal_vec (i32
35033507
(extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
35043508
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
35053509
(LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
3506-
def : Pat <(v4i16 (scalar_to_vector (i32
3510+
def : Pat <(v4i16 (vec_ins_or_scal_vec (i32
35073511
(extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
35083512
(INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
35093513
(LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
3510-
def : Pat <(v8i16 (scalar_to_vector (i32
3514+
def : Pat <(v8i16 (vec_ins_or_scal_vec (i32
35113515
(extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
35123516
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
35133517
(LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
3514-
def : Pat <(v2i32 (scalar_to_vector (i32
3518+
def : Pat <(v2i32 (vec_ins_or_scal_vec (i32
35153519
(load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
35163520
(INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
35173521
(LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
3518-
def : Pat <(v4i32 (scalar_to_vector (i32
3522+
def : Pat <(v4i32 (vec_ins_or_scal_vec (i32
35193523
(load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
35203524
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
35213525
(LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
3522-
def : Pat <(v1i64 (scalar_to_vector (i64
3526+
def : Pat <(v1i64 (vec_ins_or_scal_vec (i64
35233527
(load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
35243528
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3525-
def : Pat <(v2i64 (scalar_to_vector (i64
3529+
def : Pat <(v2i64 (vec_ins_or_scal_vec (i64
35263530
(load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
35273531
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
35283532
(LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>;
@@ -6848,61 +6852,60 @@ def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
68486852

68496853
defm INS : SIMDIns;
68506854

6851-
def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
6855+
def : Pat<(v16i8 (vec_ins_or_scal_vec GPR32:$Rn)),
68526856
(SUBREG_TO_REG (i32 0),
68536857
(f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6854-
def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
6858+
def : Pat<(v8i8 (vec_ins_or_scal_vec GPR32:$Rn)),
68556859
(SUBREG_TO_REG (i32 0),
68566860
(f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
68576861

68586862
// The top bits will be zero from the FMOVWSr
68596863
def : Pat<(v8i8 (bitconvert (i64 (zext GPR32:$Rn)))),
68606864
(SUBREG_TO_REG (i32 0), (f32 (FMOVWSr GPR32:$Rn)), ssub)>;
68616865

6862-
def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
6866+
def : Pat<(v8i16 (vec_ins_or_scal_vec GPR32:$Rn)),
68636867
(SUBREG_TO_REG (i32 0),
68646868
(f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6865-
def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
6869+
def : Pat<(v4i16 (vec_ins_or_scal_vec GPR32:$Rn)),
68666870
(SUBREG_TO_REG (i32 0),
68676871
(f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
68686872

6869-
def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
6873+
def : Pat<(v4f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
68706874
(INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6871-
def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
6875+
def : Pat<(v8f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
68726876
(INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
68736877

6874-
def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6878+
def : Pat<(v4bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
68756879
(INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6876-
def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6880+
def : Pat<(v8bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
68776881
(INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
68786882

6879-
def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))),
6883+
def : Pat<(v2i32 (vec_ins_or_scal_vec (i32 FPR32:$Rn))),
68806884
(v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
68816885
(i32 FPR32:$Rn), ssub))>;
6882-
def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))),
6886+
def : Pat<(v4i32 (vec_ins_or_scal_vec (i32 FPR32:$Rn))),
68836887
(v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
68846888
(i32 FPR32:$Rn), ssub))>;
6885-
6886-
def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
6889+
def : Pat<(v2i64 (vec_ins_or_scal_vec (i64 FPR64:$Rn))),
68876890
(v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
68886891
(i64 FPR64:$Rn), dsub))>;
68896892

6890-
def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
6893+
def : Pat<(v4f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
68916894
(INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6892-
def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
6895+
def : Pat<(v8f16 (vec_ins_or_scal_vec (f16 FPR16:$Rn))),
68936896
(INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
68946897

6895-
def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6898+
def : Pat<(v4bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
68966899
(INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6897-
def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6900+
def : Pat<(v8bf16 (vec_ins_or_scal_vec (bf16 FPR16:$Rn))),
68986901
(INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
68996902

6900-
def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
6903+
def : Pat<(v4f32 (vec_ins_or_scal_vec (f32 FPR32:$Rn))),
69016904
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
6902-
def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
6905+
def : Pat<(v2f32 (vec_ins_or_scal_vec (f32 FPR32:$Rn))),
69036906
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
69046907

6905-
def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
6908+
def : Pat<(v2f64 (vec_ins_or_scal_vec (f64 FPR64:$Rn))),
69066909
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
69076910

69086911
def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
@@ -8507,7 +8510,7 @@ def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexH
85078510
let Predicates = [HasNEON] in {
85088511
class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
85098512
SDPatternOperator ExtLoad, Instruction LD1>
8510-
: Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),
8513+
: Pat<(ResultTy (vec_ins_or_scal_vec (i32 (ExtLoad GPR64sp:$Rn)))),
85118514
(ResultTy (EXTRACT_SUBREG
85128515
(LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>;
85138516

@@ -8940,11 +8943,11 @@ def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
89408943
def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
89418944
def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))),
89428945
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8943-
def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)),
8946+
def : Pat<(v1i64 (vec_ins_or_scal_vec GPR64:$Xn)),
89448947
(COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8945-
def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)),
8948+
def : Pat<(v1f64 (vec_ins_or_scal_vec GPR64:$Xn)),
89468949
(COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8947-
def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
8950+
def : Pat<(v1f64 (vec_ins_or_scal_vec (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
89488951

89498952
def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))),
89508953
(COPY_TO_REGCLASS GPR32:$Xn, FPR32)>;

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2116,6 +2116,21 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
21162116
I.getOperand(1).setReg(NewSrc.getReg(0));
21172117
return true;
21182118
}
2119+
case AArch64::G_INSERT_VECTOR_ELT: {
2120+
// Convert the type from p0 to s64 to help selection.
2121+
LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2122+
LLT SrcVecTy = MRI.getType(I.getOperand(1).getReg());
2123+
if (!SrcVecTy.isPointerVector())
2124+
return false;
2125+
auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(2).getReg());
2126+
MRI.setType(I.getOperand(1).getReg(),
2127+
DstTy.changeElementType(LLT::scalar(64)));
2128+
MRI.setType(I.getOperand(0).getReg(),
2129+
DstTy.changeElementType(LLT::scalar(64)));
2130+
MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2131+
I.getOperand(2).setReg(NewSrc.getReg(0));
2132+
return true;
2133+
}
21192134
case TargetOpcode::G_UITOFP:
21202135
case TargetOpcode::G_SITOFP: {
21212136
// If both source and destination regbanks are FPR, then convert the opcode

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1054,6 +1054,40 @@ void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
10541054
MI.eraseFromParent();
10551055
}
10561056

1057+
// Matches G_BUILD_VECTOR where at least one source operand is not a constant
1058+
bool matchLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI) {
1059+
auto *GBuildVec = cast<GBuildVector>(&MI);
1060+
1061+
// Check if the values are all constants
1062+
for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {
1063+
auto ConstVal =
1064+
getAnyConstantVRegValWithLookThrough(GBuildVec->getSourceReg(I), MRI);
1065+
1066+
if (!ConstVal.has_value())
1067+
return true;
1068+
}
1069+
1070+
return false;
1071+
}
1072+
1073+
void applyLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI,
1074+
MachineIRBuilder &B) {
1075+
auto *GBuildVec = cast<GBuildVector>(&MI);
1076+
LLT DstTy = MRI.getType(GBuildVec->getReg(0));
1077+
Register DstReg = B.buildUndef(DstTy).getReg(0);
1078+
1079+
for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {
1080+
Register SrcReg = GBuildVec->getSourceReg(I);
1081+
if (mi_match(SrcReg, MRI, m_GImplicitDef()))
1082+
continue;
1083+
auto IdxReg = B.buildConstant(LLT::scalar(64), I);
1084+
DstReg =
1085+
B.buildInsertVectorElement(DstTy, DstReg, SrcReg, IdxReg).getReg(0);
1086+
}
1087+
B.buildCopy(GBuildVec->getReg(0), DstReg);
1088+
GBuildVec->eraseFromParent();
1089+
}
1090+
10571091
bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
10581092
Register &SrcReg) {
10591093
assert(MI.getOpcode() == TargetOpcode::G_STORE);

llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,14 @@ define i32 @bar() {
1010
; CHECK: ; %bb.0: ; %bb
1111
; CHECK-NEXT: movi.2d v0, #0000000000000000
1212
; CHECK-NEXT: mov b1, v0[1]
13-
; CHECK-NEXT: mov b2, v0[2]
14-
; CHECK-NEXT: mov b3, v0[3]
15-
; CHECK-NEXT: mov.h v0[1], v1[0]
16-
; CHECK-NEXT: mov.h v2[1], v3[0]
13+
; CHECK-NEXT: mov b2, v0[3]
14+
; CHECK-NEXT: mov b3, v0[2]
15+
; CHECK-NEXT: fmov w8, s1
16+
; CHECK-NEXT: fmov w9, s2
17+
; CHECK-NEXT: mov.h v0[1], w8
18+
; CHECK-NEXT: mov.h v3[1], w9
1719
; CHECK-NEXT: ushll.4s v0, v0, #0
18-
; CHECK-NEXT: ushll.4s v1, v2, #0
20+
; CHECK-NEXT: ushll.4s v1, v3, #0
1921
; CHECK-NEXT: mov.d v0[1], v1[0]
2022
; CHECK-NEXT: movi.4s v1, #1
2123
; CHECK-NEXT: and.16b v0, v0, v1

llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-build-vector-to-dup.mir

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,20 +42,30 @@ body: |
4242
; LOWER-NEXT: {{ $}}
4343
; LOWER-NEXT: %r:_(s32) = COPY $w0
4444
; LOWER-NEXT: %q:_(s32) = COPY $w1
45-
; LOWER-NEXT: %build_vector:_(<2 x s32>) = G_BUILD_VECTOR %r(s32), %q(s32)
45+
; LOWER-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
46+
; LOWER-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
47+
; LOWER-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %r(s32), [[C]](s64)
48+
; LOWER-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
49+
; LOWER-NEXT: [[IVEC1:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], %q(s32), [[C1]](s64)
50+
; LOWER-NEXT: %build_vector:_(<2 x s32>) = COPY [[IVEC1]](<2 x s32>)
4651
; LOWER-NEXT: $d0 = COPY %build_vector(<2 x s32>)
4752
; LOWER-NEXT: RET_ReallyLR implicit $d0
4853
;
4954
; SELECT-LABEL: name: dont_combine_different_reg
5055
; SELECT: liveins: $d0, $w0, $w1
5156
; SELECT-NEXT: {{ $}}
52-
; SELECT-NEXT: %r:gpr32all = COPY $w0
57+
; SELECT-NEXT: %r:gpr32 = COPY $w0
5358
; SELECT-NEXT: %q:gpr32 = COPY $w1
54-
; SELECT-NEXT: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
55-
; SELECT-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], %r, %subreg.ssub
56-
; SELECT-NEXT: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 1, %q
57-
; SELECT-NEXT: %build_vector:fpr64 = COPY [[INSvi32gpr]].dsub
58-
; SELECT-NEXT: $d0 = COPY %build_vector
59+
; SELECT-NEXT: [[DEF:%[0-9]+]]:fpr64 = IMPLICIT_DEF
60+
; SELECT-NEXT: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
61+
; SELECT-NEXT: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[DEF]], %subreg.dsub
62+
; SELECT-NEXT: [[INSvi32gpr:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG]], 0, %r
63+
; SELECT-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY [[INSvi32gpr]].dsub
64+
; SELECT-NEXT: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF
65+
; SELECT-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], [[COPY]], %subreg.dsub
66+
; SELECT-NEXT: [[INSvi32gpr1:%[0-9]+]]:fpr128 = INSvi32gpr [[INSERT_SUBREG1]], 1, %q
67+
; SELECT-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY [[INSvi32gpr1]].dsub
68+
; SELECT-NEXT: $d0 = COPY [[COPY1]]
5969
; SELECT-NEXT: RET_ReallyLR implicit $d0
6070
%r:_(s32) = COPY $w0
6171
%q:_(s32) = COPY $w1

llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,21 @@ body: |
355355
; CHECK: liveins: $w0, $w1, $w2, $w3
356356
; CHECK-NEXT: {{ $}}
357357
; CHECK-NEXT: %lane:_(s32) = COPY $w0
358-
; CHECK-NEXT: %shuf:_(<4 x s32>) = G_DUP %lane(s32)
358+
; CHECK-NEXT: %b:_(s32) = COPY $w1
359+
; CHECK-NEXT: %c:_(s32) = COPY $w2
360+
; CHECK-NEXT: %d:_(s32) = COPY $w3
361+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
362+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
363+
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %lane(s32), [[C]](s64)
364+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
365+
; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], %b(s32), [[C1]](s64)
366+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
367+
; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], %c(s32), [[C2]](s64)
368+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
369+
; CHECK-NEXT: [[IVEC3:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[IVEC2]], %d(s32), [[C3]](s64)
370+
; CHECK-NEXT: %buildvec:_(<4 x s32>) = COPY [[IVEC3]](<4 x s32>)
371+
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
372+
; CHECK-NEXT: %shuf:_(<4 x s32>) = G_DUPLANE32 %buildvec, [[C4]](s64)
359373
; CHECK-NEXT: $q0 = COPY %shuf(<4 x s32>)
360374
; CHECK-NEXT: RET_ReallyLR implicit $q0
361375
%lane:_(s32) = COPY $w0
@@ -367,7 +381,7 @@ body: |
367381
%shuf:_(<4 x s32>) = G_SHUFFLE_VECTOR %buildvec(<4 x s32>), %undef, shufflemask(0, 0, 0, 0)
368382
$q0 = COPY %shuf(<4 x s32>)
369383
RET_ReallyLR implicit $q0
370-
384+
371385
...
372386
---
373387
name: build_vector_rhs
@@ -382,10 +396,35 @@ body: |
382396
;
383397
; CHECK-LABEL: name: build_vector
384398
; CHECK: liveins: $w0, $w1, $w2, $w3, $w4
385-
; CHECK: %lane_1:_(s32) = COPY $w1
386-
; CHECK: %shuf:_(<4 x s32>) = G_DUP %lane_1(s32)
387-
; CHECK: $q0 = COPY %shuf(<4 x s32>)
388-
; CHECK: RET_ReallyLR implicit $q0
399+
; CHECK-NEXT: {{ $}}
400+
; CHECK-NEXT: %lane_0:_(s32) = COPY $w0
401+
; CHECK-NEXT: %lane_1:_(s32) = COPY $w1
402+
; CHECK-NEXT: %b:_(s32) = COPY $w2
403+
; CHECK-NEXT: %c:_(s32) = COPY $w3
404+
; CHECK-NEXT: %d:_(s32) = COPY $w4
405+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
406+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
407+
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %lane_0(s32), [[C]](s64)
408+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
409+
; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], %b(s32), [[C1]](s64)
410+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
411+
; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], %c(s32), [[C2]](s64)
412+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
413+
; CHECK-NEXT: [[IVEC3:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[IVEC2]], %d(s32), [[C3]](s64)
414+
; CHECK-NEXT: %buildvec0:_(<4 x s32>) = COPY [[IVEC3]](<4 x s32>)
415+
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
416+
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
417+
; CHECK-NEXT: [[IVEC4:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], %lane_1(s32), [[C4]](s64)
418+
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
419+
; CHECK-NEXT: [[IVEC5:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[IVEC4]], %b(s32), [[C5]](s64)
420+
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
421+
; CHECK-NEXT: [[IVEC6:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[IVEC5]], %c(s32), [[C6]](s64)
422+
; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
423+
; CHECK-NEXT: [[IVEC7:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[IVEC6]], %d(s32), [[C7]](s64)
424+
; CHECK-NEXT: %buildvec1:_(<4 x s32>) = COPY [[IVEC7]](<4 x s32>)
425+
; CHECK-NEXT: %shuf:_(<4 x s32>) = G_SHUFFLE_VECTOR %buildvec0(<4 x s32>), %buildvec1, shufflemask(4, 4, 4, 4)
426+
; CHECK-NEXT: $q0 = COPY %shuf(<4 x s32>)
427+
; CHECK-NEXT: RET_ReallyLR implicit $q0
389428
%lane_0:_(s32) = COPY $w0
390429
%lane_1:_(s32) = COPY $w1
391430
%b:_(s32) = COPY $w2

0 commit comments

Comments
 (0)