Skip to content

Commit 02b11c8

Browse files
committed
[𝘀𝗽𝗿] initial version
Created using spr 1.3.5
2 parents fc0b67e + 90fbc6f commit 02b11c8

File tree

5 files changed

+129
-165
lines changed

5 files changed

+129
-165
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,10 @@ class LegalizerHelper {
281281
MachineInstr &MI,
282282
LostDebugLocObserver &LocObserver);
283283

284+
MachineInstrBuilder
285+
getNeutralElementForVecReduce(unsigned Opcode, MachineIRBuilder &MIRBuilder,
286+
LLT Ty);
287+
284288
public:
285289
/// Return the alignment to use for a stack temporary object with the given
286290
/// type.

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5159,6 +5159,42 @@ LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
51595159
return Legalized;
51605160
}
51615161

5162+
MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
5163+
unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
5164+
assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
5165+
5166+
switch (Opcode) {
5167+
default:
5168+
return MIRBuilder.buildUndef(Ty);
5169+
case TargetOpcode::G_VECREDUCE_ADD:
5170+
case TargetOpcode::G_VECREDUCE_OR:
5171+
case TargetOpcode::G_VECREDUCE_XOR:
5172+
case TargetOpcode::G_VECREDUCE_UMAX:
5173+
return MIRBuilder.buildConstant(Ty, 0);
5174+
case TargetOpcode::G_VECREDUCE_MUL:
5175+
return MIRBuilder.buildConstant(Ty, 1);
5176+
case TargetOpcode::G_VECREDUCE_AND:
5177+
case TargetOpcode::G_VECREDUCE_UMIN:
5178+
return MIRBuilder.buildConstant(
5179+
Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
5180+
case TargetOpcode::G_VECREDUCE_SMAX:
5181+
return MIRBuilder.buildConstant(
5182+
Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
5183+
case TargetOpcode::G_VECREDUCE_SMIN:
5184+
return MIRBuilder.buildConstant(
5185+
Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
5186+
case TargetOpcode::G_VECREDUCE_FADD:
5187+
return MIRBuilder.buildFConstant(Ty, -0.0);
5188+
case TargetOpcode::G_VECREDUCE_FMUL:
5189+
return MIRBuilder.buildFConstant(Ty, 1.0);
5190+
case TargetOpcode::G_VECREDUCE_FMINIMUM:
5191+
case TargetOpcode::G_VECREDUCE_FMAXIMUM:
5192+
assert(false && "getNeutralElementForVecReduce unimplemented for "
5193+
"G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
5194+
}
5195+
llvm_unreachable("switch expected to return!");
5196+
}
5197+
51625198
LegalizerHelper::LegalizeResult
51635199
LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
51645200
LLT MoreTy) {
@@ -5341,6 +5377,35 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
53415377
Observer.changedInstr(MI);
53425378
return Legalized;
53435379
}
5380+
case TargetOpcode::G_VECREDUCE_FADD:
5381+
case TargetOpcode::G_VECREDUCE_FMUL:
5382+
case TargetOpcode::G_VECREDUCE_ADD:
5383+
case TargetOpcode::G_VECREDUCE_MUL:
5384+
case TargetOpcode::G_VECREDUCE_AND:
5385+
case TargetOpcode::G_VECREDUCE_OR:
5386+
case TargetOpcode::G_VECREDUCE_XOR:
5387+
case TargetOpcode::G_VECREDUCE_SMAX:
5388+
case TargetOpcode::G_VECREDUCE_SMIN:
5389+
case TargetOpcode::G_VECREDUCE_UMAX:
5390+
case TargetOpcode::G_VECREDUCE_UMIN: {
5391+
LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
5392+
MachineOperand &MO = MI.getOperand(1);
5393+
auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
5394+
auto NeutralElement = getNeutralElementForVecReduce(
5395+
MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
5396+
for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
5397+
i != e; i++) {
5398+
auto Idx = MIRBuilder.buildConstant(LLT::scalar(32), i);
5399+
NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
5400+
NeutralElement, Idx);
5401+
}
5402+
5403+
Observer.changingInstr(MI);
5404+
MO.setReg(NewVec.getReg(0));
5405+
Observer.changedInstr(MI);
5406+
return Legalized;
5407+
}
5408+
53445409
default:
53455410
return UnableToLegalize;
53465411
}

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1070,6 +1070,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
10701070
{s16, v8s16},
10711071
{s32, v2s32},
10721072
{s32, v4s32}})
1073+
.moreElementsIf(
1074+
[=](const LegalityQuery &Query) {
1075+
return Query.Types[1].isVector() &&
1076+
Query.Types[1].getElementType() != s8 &&
1077+
Query.Types[1].getNumElements() & 1;
1078+
},
1079+
LegalizeMutations::moreElementsToNextPow2(1))
10731080
.clampMaxNumElements(1, s64, 2)
10741081
.clampMaxNumElements(1, s32, 4)
10751082
.clampMaxNumElements(1, s16, 8)

llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll

Lines changed: 47 additions & 144 deletions
Original file line numberDiff line numberDiff line change
@@ -595,30 +595,14 @@ entry:
595595
}
596596

597597
define i16 @sminv_v3i16(<3 x i16> %a) {
598-
; CHECK-SD-LABEL: sminv_v3i16:
599-
; CHECK-SD: // %bb.0: // %entry
600-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
601-
; CHECK-SD-NEXT: mov w8, #32767 // =0x7fff
602-
; CHECK-SD-NEXT: mov v0.h[3], w8
603-
; CHECK-SD-NEXT: sminv h0, v0.4h
604-
; CHECK-SD-NEXT: fmov w0, s0
605-
; CHECK-SD-NEXT: ret
606-
;
607-
; CHECK-GI-LABEL: sminv_v3i16:
608-
; CHECK-GI: // %bb.0: // %entry
609-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
610-
; CHECK-GI-NEXT: mov h1, v0.h[1]
611-
; CHECK-GI-NEXT: smov w8, v0.h[0]
612-
; CHECK-GI-NEXT: umov w9, v0.h[0]
613-
; CHECK-GI-NEXT: umov w10, v0.h[1]
614-
; CHECK-GI-NEXT: smov w11, v0.h[2]
615-
; CHECK-GI-NEXT: umov w13, v0.h[2]
616-
; CHECK-GI-NEXT: fmov w12, s1
617-
; CHECK-GI-NEXT: cmp w8, w12, sxth
618-
; CHECK-GI-NEXT: csel w8, w9, w10, lt
619-
; CHECK-GI-NEXT: cmp w11, w8, sxth
620-
; CHECK-GI-NEXT: csel w0, w8, w13, gt
621-
; CHECK-GI-NEXT: ret
598+
; CHECK-LABEL: sminv_v3i16:
599+
; CHECK: // %bb.0: // %entry
600+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
601+
; CHECK-NEXT: mov w8, #32767 // =0x7fff
602+
; CHECK-NEXT: mov v0.h[3], w8
603+
; CHECK-NEXT: sminv h0, v0.4h
604+
; CHECK-NEXT: fmov w0, s0
605+
; CHECK-NEXT: ret
622606
entry:
623607
%arg1 = call i16 @llvm.vector.reduce.smin.v3i16(<3 x i16> %a)
624608
ret i16 %arg1
@@ -670,28 +654,13 @@ entry:
670654
}
671655

672656
define i32 @sminv_v3i32(<3 x i32> %a) {
673-
; CHECK-SD-LABEL: sminv_v3i32:
674-
; CHECK-SD: // %bb.0: // %entry
675-
; CHECK-SD-NEXT: mov w8, #2147483647 // =0x7fffffff
676-
; CHECK-SD-NEXT: mov v0.s[3], w8
677-
; CHECK-SD-NEXT: sminv s0, v0.4s
678-
; CHECK-SD-NEXT: fmov w0, s0
679-
; CHECK-SD-NEXT: ret
680-
;
681-
; CHECK-GI-LABEL: sminv_v3i32:
682-
; CHECK-GI: // %bb.0: // %entry
683-
; CHECK-GI-NEXT: mov s1, v0.s[1]
684-
; CHECK-GI-NEXT: fmov w8, s0
685-
; CHECK-GI-NEXT: mov s2, v0.s[2]
686-
; CHECK-GI-NEXT: fmov w9, s1
687-
; CHECK-GI-NEXT: cmp w8, w9
688-
; CHECK-GI-NEXT: fmov w9, s2
689-
; CHECK-GI-NEXT: fcsel s0, s0, s1, lt
690-
; CHECK-GI-NEXT: fmov w8, s0
691-
; CHECK-GI-NEXT: cmp w8, w9
692-
; CHECK-GI-NEXT: fcsel s0, s0, s2, lt
693-
; CHECK-GI-NEXT: fmov w0, s0
694-
; CHECK-GI-NEXT: ret
657+
; CHECK-LABEL: sminv_v3i32:
658+
; CHECK: // %bb.0: // %entry
659+
; CHECK-NEXT: mov w8, #2147483647 // =0x7fffffff
660+
; CHECK-NEXT: mov v0.s[3], w8
661+
; CHECK-NEXT: sminv s0, v0.4s
662+
; CHECK-NEXT: fmov w0, s0
663+
; CHECK-NEXT: ret
695664
entry:
696665
%arg1 = call i32 @llvm.vector.reduce.smin.v3i32(<3 x i32> %a)
697666
ret i32 %arg1
@@ -972,17 +941,10 @@ define i16 @smaxv_v3i16(<3 x i16> %a) {
972941
; CHECK-GI-LABEL: smaxv_v3i16:
973942
; CHECK-GI: // %bb.0: // %entry
974943
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
975-
; CHECK-GI-NEXT: mov h1, v0.h[1]
976-
; CHECK-GI-NEXT: smov w8, v0.h[0]
977-
; CHECK-GI-NEXT: umov w9, v0.h[0]
978-
; CHECK-GI-NEXT: umov w10, v0.h[1]
979-
; CHECK-GI-NEXT: smov w11, v0.h[2]
980-
; CHECK-GI-NEXT: umov w13, v0.h[2]
981-
; CHECK-GI-NEXT: fmov w12, s1
982-
; CHECK-GI-NEXT: cmp w8, w12, sxth
983-
; CHECK-GI-NEXT: csel w8, w9, w10, gt
984-
; CHECK-GI-NEXT: cmp w11, w8, sxth
985-
; CHECK-GI-NEXT: csel w0, w8, w13, lt
944+
; CHECK-GI-NEXT: mov w8, #32768 // =0x8000
945+
; CHECK-GI-NEXT: mov v0.h[3], w8
946+
; CHECK-GI-NEXT: smaxv h0, v0.4h
947+
; CHECK-GI-NEXT: fmov w0, s0
986948
; CHECK-GI-NEXT: ret
987949
entry:
988950
%arg1 = call i16 @llvm.vector.reduce.smax.v3i16(<3 x i16> %a)
@@ -1035,28 +997,13 @@ entry:
1035997
}
1036998

1037999
define i32 @smaxv_v3i32(<3 x i32> %a) {
1038-
; CHECK-SD-LABEL: smaxv_v3i32:
1039-
; CHECK-SD: // %bb.0: // %entry
1040-
; CHECK-SD-NEXT: mov w8, #-2147483648 // =0x80000000
1041-
; CHECK-SD-NEXT: mov v0.s[3], w8
1042-
; CHECK-SD-NEXT: smaxv s0, v0.4s
1043-
; CHECK-SD-NEXT: fmov w0, s0
1044-
; CHECK-SD-NEXT: ret
1045-
;
1046-
; CHECK-GI-LABEL: smaxv_v3i32:
1047-
; CHECK-GI: // %bb.0: // %entry
1048-
; CHECK-GI-NEXT: mov s1, v0.s[1]
1049-
; CHECK-GI-NEXT: fmov w8, s0
1050-
; CHECK-GI-NEXT: mov s2, v0.s[2]
1051-
; CHECK-GI-NEXT: fmov w9, s1
1052-
; CHECK-GI-NEXT: cmp w8, w9
1053-
; CHECK-GI-NEXT: fmov w9, s2
1054-
; CHECK-GI-NEXT: fcsel s0, s0, s1, gt
1055-
; CHECK-GI-NEXT: fmov w8, s0
1056-
; CHECK-GI-NEXT: cmp w8, w9
1057-
; CHECK-GI-NEXT: fcsel s0, s0, s2, gt
1058-
; CHECK-GI-NEXT: fmov w0, s0
1059-
; CHECK-GI-NEXT: ret
1000+
; CHECK-LABEL: smaxv_v3i32:
1001+
; CHECK: // %bb.0: // %entry
1002+
; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000
1003+
; CHECK-NEXT: mov v0.s[3], w8
1004+
; CHECK-NEXT: smaxv s0, v0.4s
1005+
; CHECK-NEXT: fmov w0, s0
1006+
; CHECK-NEXT: ret
10601007
entry:
10611008
%arg1 = call i32 @llvm.vector.reduce.smax.v3i32(<3 x i32> %a)
10621009
ret i32 %arg1
@@ -1335,17 +1282,10 @@ define i16 @uminv_v3i16(<3 x i16> %a) {
13351282
; CHECK-GI-LABEL: uminv_v3i16:
13361283
; CHECK-GI: // %bb.0: // %entry
13371284
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1338-
; CHECK-GI-NEXT: mov h1, v0.h[1]
1339-
; CHECK-GI-NEXT: umov w8, v0.h[0]
1340-
; CHECK-GI-NEXT: umov w9, v0.h[0]
1341-
; CHECK-GI-NEXT: umov w10, v0.h[1]
1342-
; CHECK-GI-NEXT: umov w11, v0.h[2]
1343-
; CHECK-GI-NEXT: umov w13, v0.h[2]
1344-
; CHECK-GI-NEXT: fmov w12, s1
1345-
; CHECK-GI-NEXT: cmp w8, w12, uxth
1346-
; CHECK-GI-NEXT: csel w8, w9, w10, lo
1347-
; CHECK-GI-NEXT: cmp w11, w8, uxth
1348-
; CHECK-GI-NEXT: csel w0, w8, w13, hi
1285+
; CHECK-GI-NEXT: mov w8, #65535 // =0xffff
1286+
; CHECK-GI-NEXT: mov v0.h[3], w8
1287+
; CHECK-GI-NEXT: uminv h0, v0.4h
1288+
; CHECK-GI-NEXT: fmov w0, s0
13491289
; CHECK-GI-NEXT: ret
13501290
entry:
13511291
%arg1 = call i16 @llvm.vector.reduce.umin.v3i16(<3 x i16> %a)
@@ -1398,28 +1338,13 @@ entry:
13981338
}
13991339

14001340
define i32 @uminv_v3i32(<3 x i32> %a) {
1401-
; CHECK-SD-LABEL: uminv_v3i32:
1402-
; CHECK-SD: // %bb.0: // %entry
1403-
; CHECK-SD-NEXT: mov w8, #-1 // =0xffffffff
1404-
; CHECK-SD-NEXT: mov v0.s[3], w8
1405-
; CHECK-SD-NEXT: uminv s0, v0.4s
1406-
; CHECK-SD-NEXT: fmov w0, s0
1407-
; CHECK-SD-NEXT: ret
1408-
;
1409-
; CHECK-GI-LABEL: uminv_v3i32:
1410-
; CHECK-GI: // %bb.0: // %entry
1411-
; CHECK-GI-NEXT: mov s1, v0.s[1]
1412-
; CHECK-GI-NEXT: fmov w8, s0
1413-
; CHECK-GI-NEXT: mov s2, v0.s[2]
1414-
; CHECK-GI-NEXT: fmov w9, s1
1415-
; CHECK-GI-NEXT: cmp w8, w9
1416-
; CHECK-GI-NEXT: fmov w9, s2
1417-
; CHECK-GI-NEXT: fcsel s0, s0, s1, lo
1418-
; CHECK-GI-NEXT: fmov w8, s0
1419-
; CHECK-GI-NEXT: cmp w8, w9
1420-
; CHECK-GI-NEXT: fcsel s0, s0, s2, lo
1421-
; CHECK-GI-NEXT: fmov w0, s0
1422-
; CHECK-GI-NEXT: ret
1341+
; CHECK-LABEL: uminv_v3i32:
1342+
; CHECK: // %bb.0: // %entry
1343+
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
1344+
; CHECK-NEXT: mov v0.s[3], w8
1345+
; CHECK-NEXT: uminv s0, v0.4s
1346+
; CHECK-NEXT: fmov w0, s0
1347+
; CHECK-NEXT: ret
14231348
entry:
14241349
%arg1 = call i32 @llvm.vector.reduce.umin.v3i32(<3 x i32> %a)
14251350
ret i32 %arg1
@@ -1697,17 +1622,10 @@ define i16 @umaxv_v3i16(<3 x i16> %a) {
16971622
; CHECK-GI-LABEL: umaxv_v3i16:
16981623
; CHECK-GI: // %bb.0: // %entry
16991624
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1700-
; CHECK-GI-NEXT: mov h1, v0.h[1]
1701-
; CHECK-GI-NEXT: umov w8, v0.h[0]
1702-
; CHECK-GI-NEXT: umov w9, v0.h[0]
1703-
; CHECK-GI-NEXT: umov w10, v0.h[1]
1704-
; CHECK-GI-NEXT: umov w11, v0.h[2]
1705-
; CHECK-GI-NEXT: umov w13, v0.h[2]
1706-
; CHECK-GI-NEXT: fmov w12, s1
1707-
; CHECK-GI-NEXT: cmp w8, w12, uxth
1708-
; CHECK-GI-NEXT: csel w8, w9, w10, hi
1709-
; CHECK-GI-NEXT: cmp w11, w8, uxth
1710-
; CHECK-GI-NEXT: csel w0, w8, w13, lo
1625+
; CHECK-GI-NEXT: mov w8, #0 // =0x0
1626+
; CHECK-GI-NEXT: mov v0.h[3], w8
1627+
; CHECK-GI-NEXT: umaxv h0, v0.4h
1628+
; CHECK-GI-NEXT: fmov w0, s0
17111629
; CHECK-GI-NEXT: ret
17121630
entry:
17131631
%arg1 = call i16 @llvm.vector.reduce.umax.v3i16(<3 x i16> %a)
@@ -1760,27 +1678,12 @@ entry:
17601678
}
17611679

17621680
define i32 @umaxv_v3i32(<3 x i32> %a) {
1763-
; CHECK-SD-LABEL: umaxv_v3i32:
1764-
; CHECK-SD: // %bb.0: // %entry
1765-
; CHECK-SD-NEXT: mov v0.s[3], wzr
1766-
; CHECK-SD-NEXT: umaxv s0, v0.4s
1767-
; CHECK-SD-NEXT: fmov w0, s0
1768-
; CHECK-SD-NEXT: ret
1769-
;
1770-
; CHECK-GI-LABEL: umaxv_v3i32:
1771-
; CHECK-GI: // %bb.0: // %entry
1772-
; CHECK-GI-NEXT: mov s1, v0.s[1]
1773-
; CHECK-GI-NEXT: fmov w8, s0
1774-
; CHECK-GI-NEXT: mov s2, v0.s[2]
1775-
; CHECK-GI-NEXT: fmov w9, s1
1776-
; CHECK-GI-NEXT: cmp w8, w9
1777-
; CHECK-GI-NEXT: fmov w9, s2
1778-
; CHECK-GI-NEXT: fcsel s0, s0, s1, hi
1779-
; CHECK-GI-NEXT: fmov w8, s0
1780-
; CHECK-GI-NEXT: cmp w8, w9
1781-
; CHECK-GI-NEXT: fcsel s0, s0, s2, hi
1782-
; CHECK-GI-NEXT: fmov w0, s0
1783-
; CHECK-GI-NEXT: ret
1681+
; CHECK-LABEL: umaxv_v3i32:
1682+
; CHECK: // %bb.0: // %entry
1683+
; CHECK-NEXT: mov v0.s[3], wzr
1684+
; CHECK-NEXT: umaxv s0, v0.4s
1685+
; CHECK-NEXT: fmov w0, s0
1686+
; CHECK-NEXT: ret
17841687
entry:
17851688
%arg1 = call i32 @llvm.vector.reduce.umax.v3i32(<3 x i32> %a)
17861689
ret i32 %arg1

llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -200,27 +200,12 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind {
200200
}
201201

202202
define i32 @test_v3i32(<3 x i32> %a) nounwind {
203-
; CHECK-SD-LABEL: test_v3i32:
204-
; CHECK-SD: // %bb.0:
205-
; CHECK-SD-NEXT: mov v0.s[3], wzr
206-
; CHECK-SD-NEXT: umaxv s0, v0.4s
207-
; CHECK-SD-NEXT: fmov w0, s0
208-
; CHECK-SD-NEXT: ret
209-
;
210-
; CHECK-GI-LABEL: test_v3i32:
211-
; CHECK-GI: // %bb.0:
212-
; CHECK-GI-NEXT: mov s1, v0.s[1]
213-
; CHECK-GI-NEXT: fmov w8, s0
214-
; CHECK-GI-NEXT: mov s2, v0.s[2]
215-
; CHECK-GI-NEXT: fmov w9, s1
216-
; CHECK-GI-NEXT: cmp w8, w9
217-
; CHECK-GI-NEXT: fmov w9, s2
218-
; CHECK-GI-NEXT: fcsel s0, s0, s1, hi
219-
; CHECK-GI-NEXT: fmov w8, s0
220-
; CHECK-GI-NEXT: cmp w8, w9
221-
; CHECK-GI-NEXT: fcsel s0, s0, s2, hi
222-
; CHECK-GI-NEXT: fmov w0, s0
223-
; CHECK-GI-NEXT: ret
203+
; CHECK-LABEL: test_v3i32:
204+
; CHECK: // %bb.0:
205+
; CHECK-NEXT: mov v0.s[3], wzr
206+
; CHECK-NEXT: umaxv s0, v0.4s
207+
; CHECK-NEXT: fmov w0, s0
208+
; CHECK-NEXT: ret
224209
%b = call i32 @llvm.vector.reduce.umax.v3i32(<3 x i32> %a)
225210
ret i32 %b
226211
}

0 commit comments

Comments
 (0)