Skip to content

Commit b0dc7b5

Browse files
authored
[AArch64][GlobalISel] Prefer to use Vector Truncate (#105692)
Tries to combine scalarised truncates into vector truncate operations EXAMPLE: `%a(i32), %b(i32) = G_UNMERGE %src(<2 x i32>)` `%T_a(i16) = G_TRUNC %a(i32)` `%T_b(i16) = G_TRUNC %b(i32)` `%Imp(i16) = G_IMPLICIT_DEF(i16)` `%dst(v8i16) = G_MERGE_VALUES %T_a(i16), %T_b(i16), %Imp(i16), %Imp(i16)` ===> `%Imp(<2 x i32>) = G_IMPLICIT_DEF(<2 x i32>)` `%Mid(<4 x s16>) = G_CONCAT_VECTORS %src(<2 x i32>), %Imp(<2 x i32>)` `%dst(<4 x s16>) = G_TRUNC %Mid(<4 x s16>)`
1 parent f4eeae1 commit b0dc7b5

File tree

15 files changed

+178
-238
lines changed

15 files changed

+178
-238
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,9 @@ class CombinerHelper {
600600
bool matchRotateOutOfRange(MachineInstr &MI);
601601
void applyRotateOutOfRange(MachineInstr &MI);
602602

603+
bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo);
604+
void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo);
605+
603606
/// \returns true if a G_ICMP instruction \p MI can be replaced with a true
604607
/// or false constant based off of KnownBits information.
605608
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo);

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1505,6 +1505,13 @@ def insert_vector_elt_oob : GICombineRule<
15051505
[{ return Helper.matchInsertVectorElementOOB(*${root}, ${matchinfo}); }]),
15061506
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
15071507

1508+
// Combine v8i8 (buildvector i8 (trunc(unmerge)), i8 (trunc), i8 (trunc), i8 (trunc), undef, undef, undef, undef)
1509+
def combine_use_vector_truncate : GICombineRule<
1510+
(defs root:$root, register_matchinfo:$matchinfo),
1511+
(match (G_BUILD_VECTOR $dst, GIVariadic<>:$unused):$root,
1512+
[{ return Helper.matchUseVectorTruncate(*${root}, ${matchinfo}); }]),
1513+
(apply [{ Helper.applyUseVectorTruncate(*${root}, ${matchinfo}); }])>;
1514+
15081515
def add_of_vscale : GICombineRule<
15091516
(defs root:$root, build_fn_matchinfo:$matchinfo),
15101517
(match (G_VSCALE $left, $imm1),
@@ -1912,7 +1919,8 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
19121919
sub_add_reg, select_to_minmax,
19131920
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
19141921
combine_concat_vector, match_addos,
1915-
sext_trunc, zext_trunc, prefer_sign_combines, combine_shuffle_concat]>;
1922+
sext_trunc, zext_trunc, prefer_sign_combines, combine_shuffle_concat,
1923+
combine_use_vector_truncate]>;
19161924

19171925
// A combine group used to for prelegalizer combiners at -O0. The combines in
19181926
// this group have been selected based on experiments to balance code size and

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3320,6 +3320,112 @@ static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
33203320
isConstTrueVal(TLI, Cst, IsVector, IsFP);
33213321
}
33223322

3323+
// This combine tries to reduce the number of scalarised G_TRUNC instructions by
3324+
// using vector truncates instead
3325+
//
3326+
// EXAMPLE:
3327+
// %a(i32), %b(i32) = G_UNMERGE_VALUES %src(<2 x i32>)
3328+
// %T_a(i16) = G_TRUNC %a(i32)
3329+
// %T_b(i16) = G_TRUNC %b(i32)
3330+
// %Undef(i16) = G_IMPLICIT_DEF(i16)
3331+
// %dst(v4i16) = G_BUILD_VECTORS %T_a(i16), %T_b(i16), %Undef(i16), %Undef(i16)
3332+
//
3333+
// ===>
3334+
// %Undef(<2 x i32>) = G_IMPLICIT_DEF(<2 x i32>)
3335+
// %Mid(<4 x s32>) = G_CONCAT_VECTORS %src(<2 x i32>), %Undef(<2 x i32>)
3336+
// %dst(<4 x s16>) = G_TRUNC %Mid(<4 x s32>)
3337+
//
3338+
// Only matches sources made up of G_TRUNCs followed by G_IMPLICIT_DEFs
3339+
bool CombinerHelper::matchUseVectorTruncate(MachineInstr &MI,
3340+
Register &MatchInfo) {
3341+
auto BuildMI = cast<GBuildVector>(&MI);
3342+
unsigned NumOperands = BuildMI->getNumSources();
3343+
LLT DstTy = MRI.getType(BuildMI->getReg(0));
3344+
3345+
// Check the G_BUILD_VECTOR sources
3346+
unsigned I;
3347+
MachineInstr *UnmergeMI = nullptr;
3348+
3349+
// Check all source TRUNCs come from the same UNMERGE instruction
3350+
for (I = 0; I < NumOperands; ++I) {
3351+
auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3352+
auto SrcMIOpc = SrcMI->getOpcode();
3353+
3354+
// Check if the G_TRUNC instructions all come from the same MI
3355+
if (SrcMIOpc == TargetOpcode::G_TRUNC) {
3356+
if (!UnmergeMI) {
3357+
UnmergeMI = MRI.getVRegDef(SrcMI->getOperand(1).getReg());
3358+
if (UnmergeMI->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
3359+
return false;
3360+
} else {
3361+
auto UnmergeSrcMI = MRI.getVRegDef(SrcMI->getOperand(1).getReg());
3362+
if (UnmergeMI != UnmergeSrcMI)
3363+
return false;
3364+
}
3365+
} else {
3366+
break;
3367+
}
3368+
}
3369+
if (I < 2)
3370+
return false;
3371+
3372+
// Check the remaining source elements are only G_IMPLICIT_DEF
3373+
for (; I < NumOperands; ++I) {
3374+
auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3375+
auto SrcMIOpc = SrcMI->getOpcode();
3376+
3377+
if (SrcMIOpc != TargetOpcode::G_IMPLICIT_DEF)
3378+
return false;
3379+
}
3380+
3381+
// Check the size of unmerge source
3382+
MatchInfo = cast<GUnmerge>(UnmergeMI)->getSourceReg();
3383+
LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3384+
if (!DstTy.getElementCount().isKnownMultipleOf(UnmergeSrcTy.getNumElements()))
3385+
return false;
3386+
3387+
// Only generate legal instructions post-legalizer
3388+
if (!IsPreLegalize) {
3389+
LLT MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3390+
3391+
if (DstTy.getElementCount() != UnmergeSrcTy.getElementCount() &&
3392+
!isLegal({TargetOpcode::G_CONCAT_VECTORS, {MidTy, UnmergeSrcTy}}))
3393+
return false;
3394+
3395+
if (!isLegal({TargetOpcode::G_TRUNC, {DstTy, MidTy}}))
3396+
return false;
3397+
}
3398+
3399+
return true;
3400+
}
3401+
3402+
void CombinerHelper::applyUseVectorTruncate(MachineInstr &MI,
3403+
Register &MatchInfo) {
3404+
Register MidReg;
3405+
auto BuildMI = cast<GBuildVector>(&MI);
3406+
Register DstReg = BuildMI->getReg(0);
3407+
LLT DstTy = MRI.getType(DstReg);
3408+
LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3409+
unsigned DstTyNumElt = DstTy.getNumElements();
3410+
unsigned UnmergeSrcTyNumElt = UnmergeSrcTy.getNumElements();
3411+
3412+
// No need to pad vector if only G_TRUNC is needed
3413+
if (DstTyNumElt / UnmergeSrcTyNumElt == 1) {
3414+
MidReg = MatchInfo;
3415+
} else {
3416+
Register UndefReg = Builder.buildUndef(UnmergeSrcTy).getReg(0);
3417+
SmallVector<Register> ConcatRegs = {MatchInfo};
3418+
for (unsigned I = 1; I < DstTyNumElt / UnmergeSrcTyNumElt; ++I)
3419+
ConcatRegs.push_back(UndefReg);
3420+
3421+
auto MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3422+
MidReg = Builder.buildConcatVectors(MidTy, ConcatRegs).getReg(0);
3423+
}
3424+
3425+
Builder.buildTrunc(DstReg, MidReg);
3426+
MI.eraseFromParent();
3427+
}
3428+
33233429
bool CombinerHelper::matchNotCmp(MachineInstr &MI,
33243430
SmallVectorImpl<Register> &RegsToNegate) {
33253431
assert(MI.getOpcode() == TargetOpcode::G_XOR);

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,5 +330,5 @@ def AArch64PostLegalizerCombiner
330330
select_to_minmax, or_to_bsp, combine_concat_vector,
331331
commute_constant_to_rhs,
332332
push_freeze_to_prevent_poison_from_propagating,
333-
combine_mul_cmlt]> {
333+
combine_mul_cmlt, combine_use_vector_truncate]> {
334334
}

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
9595
getActionDefinitionsBuilder(
9696
{G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
9797
.legalFor({p0, s8, s16, s32, s64})
98-
.legalFor(PackedVectorAllTypeList)
98+
.legalFor({v16s8, v8s16, v4s32, v2s64, v2p0, v8s8, v4s16, v2s32, v4s8,
99+
v2s16, v2s8})
99100
.widenScalarToNextPow2(0)
100101
.clampScalar(0, s8, s64)
101102
.moreElementsToNextPow2(0)

llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -159,25 +159,13 @@ body: |
159159
; CHECK-LABEL: name: test_freeze_v3s8
160160
; CHECK: liveins: $q0
161161
; CHECK-NEXT: {{ $}}
162-
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
163-
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
164-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16)
165-
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s16)
166-
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16)
167-
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
168-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
169-
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR]](<8 x s8>)
170-
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s16>), [[UV5:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
171-
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[UV4]]
172-
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FREEZE]](<4 x s16>)
162+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
163+
; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s8>) = G_FREEZE [[DEF]]
164+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[FREEZE]](<4 x s8>)
173165
; CHECK-NEXT: %undef:_(s32) = G_IMPLICIT_DEF
174-
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s16)
175-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
176-
; CHECK-NEXT: %ext0:_(s32) = G_AND [[ANYEXT1]], [[C]]
177-
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s16)
178-
; CHECK-NEXT: %ext1:_(s32) = G_AND [[ANYEXT2]], [[C]]
179-
; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s16)
180-
; CHECK-NEXT: %ext2:_(s32) = G_AND [[ANYEXT3]], [[C]]
166+
; CHECK-NEXT: %ext0:_(s32) = G_ZEXT [[UV]](s8)
167+
; CHECK-NEXT: %ext1:_(s32) = G_ZEXT [[UV1]](s8)
168+
; CHECK-NEXT: %ext2:_(s32) = G_ZEXT [[UV2]](s8)
181169
; CHECK-NEXT: %res:_(<4 x s32>) = G_BUILD_VECTOR %ext0(s32), %ext1(s32), %ext2(s32), %undef(s32)
182170
; CHECK-NEXT: $q0 = COPY %res(<4 x s32>)
183171
%x:_(<3 x s8>) = G_IMPLICIT_DEF

llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -248,13 +248,10 @@ body: |
248248
; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16)
249249
; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s16)
250250
; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s16)
251-
; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
252-
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>)
253-
; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s16)
254-
; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s16)
255-
; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV8]](s16)
256-
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
257-
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
251+
; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
252+
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF2]](<4 x s8>)
253+
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
254+
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[UV6]](s8), [[UV7]](s8), [[UV8]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
258255
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR1]](<16 x s8>), [[BUILD_VECTOR2]], shufflemask(0, 16, 16, 16, 1, 16, 16, 16, 2, 16, 16, 16, undef, undef, undef, undef)
259256
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[SHUF]](<16 x s8>)
260257
; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(<4 x s32>) = G_UITOFP [[BITCAST]](<4 x s32>)

llvm/test/CodeGen/AArch64/bswap.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -177,9 +177,7 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %a){
177177
;
178178
; CHECK-GI-LABEL: bswap_v2i16:
179179
; CHECK-GI: // %bb.0: // %entry
180-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
181-
; CHECK-GI-NEXT: mov w8, v0.s[1]
182-
; CHECK-GI-NEXT: mov v0.h[1], w8
180+
; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h
183181
; CHECK-GI-NEXT: rev16 v0.8b, v0.8b
184182
; CHECK-GI-NEXT: mov h1, v0.h[1]
185183
; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]

llvm/test/CodeGen/AArch64/concat-vector.ll

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -183,15 +183,12 @@ define <8 x i16> @concat_v8s16_v2s16(ptr %ptr) {
183183
;
184184
; CHECK-GI-LABEL: concat_v8s16_v2s16:
185185
; CHECK-GI: // %bb.0:
186-
; CHECK-GI-NEXT: ldr h1, [x0]
187-
; CHECK-GI-NEXT: ldr h2, [x0, #2]
188-
; CHECK-GI-NEXT: dup v0.4s, w8
189-
; CHECK-GI-NEXT: mov v1.s[1], v2.s[0]
190-
; CHECK-GI-NEXT: xtn v2.4h, v0.4s
191-
; CHECK-GI-NEXT: xtn v1.4h, v1.4s
192-
; CHECK-GI-NEXT: fmov w8, s1
186+
; CHECK-GI-NEXT: ldr h0, [x0]
187+
; CHECK-GI-NEXT: ldr h1, [x0, #2]
188+
; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
189+
; CHECK-GI-NEXT: xtn v0.4h, v0.4s
190+
; CHECK-GI-NEXT: fmov w8, s0
193191
; CHECK-GI-NEXT: mov v0.s[0], w8
194-
; CHECK-GI-NEXT: fmov w8, s2
195192
; CHECK-GI-NEXT: mov v0.s[1], w8
196193
; CHECK-GI-NEXT: mov v0.s[2], w8
197194
; CHECK-GI-NEXT: mov v0.s[3], w8
@@ -209,10 +206,7 @@ define <16 x i8> @concat_v16s8_v4s8(ptr %ptr) {
209206
;
210207
; CHECK-GI-LABEL: concat_v16s8_v4s8:
211208
; CHECK-GI: // %bb.0:
212-
; CHECK-GI-NEXT: dup v0.8h, w8
213-
; CHECK-GI-NEXT: xtn v1.8b, v0.8h
214209
; CHECK-GI-NEXT: ldr s0, [x0]
215-
; CHECK-GI-NEXT: fmov w8, s1
216210
; CHECK-GI-NEXT: mov v0.s[1], w8
217211
; CHECK-GI-NEXT: mov v0.s[2], w8
218212
; CHECK-GI-NEXT: mov v0.s[3], w8

llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,24 +3,10 @@
33
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
44

55
define <4 x half> @interleave2_v4f16(<2 x half> %vec0, <2 x half> %vec1) {
6-
; CHECK-SD-LABEL: interleave2_v4f16:
7-
; CHECK-SD: // %bb.0:
8-
; CHECK-SD-NEXT: zip1 v0.4h, v0.4h, v1.4h
9-
; CHECK-SD-NEXT: ret
10-
;
11-
; CHECK-GI-LABEL: interleave2_v4f16:
12-
; CHECK-GI: // %bb.0:
13-
; CHECK-GI-NEXT: dup v2.4s, w8
14-
; CHECK-GI-NEXT: fmov w8, s0
15-
; CHECK-GI-NEXT: fmov w9, s1
16-
; CHECK-GI-NEXT: xtn v0.4h, v2.4s
17-
; CHECK-GI-NEXT: mov v1.s[0], w8
18-
; CHECK-GI-NEXT: mov v2.s[0], w9
19-
; CHECK-GI-NEXT: fmov w8, s0
20-
; CHECK-GI-NEXT: mov v1.s[1], w8
21-
; CHECK-GI-NEXT: mov v2.s[1], w8
22-
; CHECK-GI-NEXT: zip1 v0.4h, v1.4h, v2.4h
23-
; CHECK-GI-NEXT: ret
6+
; CHECK-LABEL: interleave2_v4f16:
7+
; CHECK: // %bb.0:
8+
; CHECK-NEXT: zip1 v0.4h, v0.4h, v1.4h
9+
; CHECK-NEXT: ret
2410
%retval = call <4 x half> @llvm.vector.interleave2.v4f16(<2 x half> %vec0, <2 x half> %vec1)
2511
ret <4 x half> %retval
2612
}

llvm/test/CodeGen/AArch64/fptoi.ll

Lines changed: 12 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -3172,42 +3172,22 @@ entry:
31723172
}
31733173

31743174
define <3 x i16> @fptos_v3f32_v3i16(<3 x float> %a) {
3175-
; CHECK-SD-LABEL: fptos_v3f32_v3i16:
3176-
; CHECK-SD: // %bb.0: // %entry
3177-
; CHECK-SD-NEXT: fcvtzs v0.4s, v0.4s
3178-
; CHECK-SD-NEXT: xtn v0.4h, v0.4s
3179-
; CHECK-SD-NEXT: ret
3180-
;
3181-
; CHECK-GI-LABEL: fptos_v3f32_v3i16:
3182-
; CHECK-GI: // %bb.0: // %entry
3183-
; CHECK-GI-NEXT: fcvtzs v0.4s, v0.4s
3184-
; CHECK-GI-NEXT: mov w8, v0.s[1]
3185-
; CHECK-GI-NEXT: mov w9, v0.s[2]
3186-
; CHECK-GI-NEXT: mov v0.h[1], w8
3187-
; CHECK-GI-NEXT: mov v0.h[2], w9
3188-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
3189-
; CHECK-GI-NEXT: ret
3175+
; CHECK-LABEL: fptos_v3f32_v3i16:
3176+
; CHECK: // %bb.0: // %entry
3177+
; CHECK-NEXT: fcvtzs v0.4s, v0.4s
3178+
; CHECK-NEXT: xtn v0.4h, v0.4s
3179+
; CHECK-NEXT: ret
31903180
entry:
31913181
%c = fptosi <3 x float> %a to <3 x i16>
31923182
ret <3 x i16> %c
31933183
}
31943184

31953185
define <3 x i16> @fptou_v3f32_v3i16(<3 x float> %a) {
3196-
; CHECK-SD-LABEL: fptou_v3f32_v3i16:
3197-
; CHECK-SD: // %bb.0: // %entry
3198-
; CHECK-SD-NEXT: fcvtzu v0.4s, v0.4s
3199-
; CHECK-SD-NEXT: xtn v0.4h, v0.4s
3200-
; CHECK-SD-NEXT: ret
3201-
;
3202-
; CHECK-GI-LABEL: fptou_v3f32_v3i16:
3203-
; CHECK-GI: // %bb.0: // %entry
3204-
; CHECK-GI-NEXT: fcvtzu v0.4s, v0.4s
3205-
; CHECK-GI-NEXT: mov w8, v0.s[1]
3206-
; CHECK-GI-NEXT: mov w9, v0.s[2]
3207-
; CHECK-GI-NEXT: mov v0.h[1], w8
3208-
; CHECK-GI-NEXT: mov v0.h[2], w9
3209-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
3210-
; CHECK-GI-NEXT: ret
3186+
; CHECK-LABEL: fptou_v3f32_v3i16:
3187+
; CHECK: // %bb.0: // %entry
3188+
; CHECK-NEXT: fcvtzu v0.4s, v0.4s
3189+
; CHECK-NEXT: xtn v0.4h, v0.4s
3190+
; CHECK-NEXT: ret
32113191
entry:
32123192
%c = fptoui <3 x float> %a to <3 x i16>
32133193
ret <3 x i16> %c
@@ -6077,11 +6057,7 @@ define <3 x i16> @fptos_v3f16_v3i16(<3 x half> %a) {
60776057
; CHECK-GI-NOFP16: // %bb.0: // %entry
60786058
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
60796059
; CHECK-GI-NOFP16-NEXT: fcvtzs v0.4s, v0.4s
6080-
; CHECK-GI-NOFP16-NEXT: mov w8, v0.s[1]
6081-
; CHECK-GI-NOFP16-NEXT: mov w9, v0.s[2]
6082-
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], w8
6083-
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], w9
6084-
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
6060+
; CHECK-GI-NOFP16-NEXT: xtn v0.4h, v0.4s
60856061
; CHECK-GI-NOFP16-NEXT: ret
60866062
;
60876063
; CHECK-GI-FP16-LABEL: fptos_v3f16_v3i16:
@@ -6110,11 +6086,7 @@ define <3 x i16> @fptou_v3f16_v3i16(<3 x half> %a) {
61106086
; CHECK-GI-NOFP16: // %bb.0: // %entry
61116087
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
61126088
; CHECK-GI-NOFP16-NEXT: fcvtzu v0.4s, v0.4s
6113-
; CHECK-GI-NOFP16-NEXT: mov w8, v0.s[1]
6114-
; CHECK-GI-NOFP16-NEXT: mov w9, v0.s[2]
6115-
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], w8
6116-
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], w9
6117-
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 killed $q0
6089+
; CHECK-GI-NOFP16-NEXT: xtn v0.4h, v0.4s
61186090
; CHECK-GI-NOFP16-NEXT: ret
61196091
;
61206092
; CHECK-GI-FP16-LABEL: fptou_v3f16_v3i16:

llvm/test/CodeGen/AArch64/itofp.ll

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7450,9 +7450,7 @@ define <2 x half> @stofp_v2i16_v2f16(<2 x i16> %a) {
74507450
;
74517451
; CHECK-GI-FP16-LABEL: stofp_v2i16_v2f16:
74527452
; CHECK-GI-FP16: // %bb.0: // %entry
7453-
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
7454-
; CHECK-GI-FP16-NEXT: mov w8, v0.s[1]
7455-
; CHECK-GI-FP16-NEXT: mov v0.h[1], w8
7453+
; CHECK-GI-FP16-NEXT: uzp1 v0.4h, v0.4h, v0.4h
74567454
; CHECK-GI-FP16-NEXT: scvtf v0.4h, v0.4h
74577455
; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
74587456
; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
@@ -7493,9 +7491,7 @@ define <2 x half> @utofp_v2i16_v2f16(<2 x i16> %a) {
74937491
;
74947492
; CHECK-GI-FP16-LABEL: utofp_v2i16_v2f16:
74957493
; CHECK-GI-FP16: // %bb.0: // %entry
7496-
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
7497-
; CHECK-GI-FP16-NEXT: mov w8, v0.s[1]
7498-
; CHECK-GI-FP16-NEXT: mov v0.h[1], w8
7494+
; CHECK-GI-FP16-NEXT: uzp1 v0.4h, v0.4h, v0.4h
74997495
; CHECK-GI-FP16-NEXT: ucvtf v0.4h, v0.4h
75007496
; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
75017497
; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
@@ -8059,8 +8055,7 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) {
80598055
; CHECK-GI-FP16-NEXT: movi d1, #0x0000ff000000ff
80608056
; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0
80618057
; CHECK-GI-FP16-NEXT: and v0.8b, v0.8b, v1.8b
8062-
; CHECK-GI-FP16-NEXT: mov w8, v0.s[1]
8063-
; CHECK-GI-FP16-NEXT: mov v0.h[1], w8
8058+
; CHECK-GI-FP16-NEXT: uzp1 v0.4h, v0.4h, v0.4h
80648059
; CHECK-GI-FP16-NEXT: ucvtf v0.4h, v0.4h
80658060
; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
80668061
; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]

0 commit comments

Comments
 (0)