Skip to content

Commit 2c9b6c1

Browse files
authored
[AArch64][GlobalISel] Improve codegen for G_VECREDUCE_{SMIN,SMAX,UMIN,UMAX} for odd-sized vectors (#82740)
i8 vectors do not have their sizes changed as I noticed regressions in some tests when that was done. This patch also adds support for most G_VECREDUCE_* operations to moreElementsVector in LegalizerHelper.cpp. The code for getting the "neutral" element is taken almost exactly as it is in SelectionDAG, with the exception that support for G_VECREDUCE_{FMAXIMUM,FMINIMUM} was not added. The code for SelectionDAG is located at SelectionDAG::getNeutralELement().
1 parent d128448 commit 2c9b6c1

File tree

5 files changed

+132
-165
lines changed

5 files changed

+132
-165
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,10 @@ class LegalizerHelper {
281281
MachineInstr &MI,
282282
LostDebugLocObserver &LocObserver);
283283

284+
MachineInstrBuilder
285+
getNeutralElementForVecReduce(unsigned Opcode, MachineIRBuilder &MIRBuilder,
286+
LLT Ty);
287+
284288
public:
285289
/// Return the alignment to use for a stack temporary object with the given
286290
/// type.

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5216,6 +5216,43 @@ LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
52165216
return Legalized;
52175217
}
52185218

5219+
MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
5220+
unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
5221+
assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
5222+
5223+
switch (Opcode) {
5224+
default:
5225+
llvm_unreachable(
5226+
"getNeutralElementForVecReduce called with invalid opcode!");
5227+
case TargetOpcode::G_VECREDUCE_ADD:
5228+
case TargetOpcode::G_VECREDUCE_OR:
5229+
case TargetOpcode::G_VECREDUCE_XOR:
5230+
case TargetOpcode::G_VECREDUCE_UMAX:
5231+
return MIRBuilder.buildConstant(Ty, 0);
5232+
case TargetOpcode::G_VECREDUCE_MUL:
5233+
return MIRBuilder.buildConstant(Ty, 1);
5234+
case TargetOpcode::G_VECREDUCE_AND:
5235+
case TargetOpcode::G_VECREDUCE_UMIN:
5236+
return MIRBuilder.buildConstant(
5237+
Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
5238+
case TargetOpcode::G_VECREDUCE_SMAX:
5239+
return MIRBuilder.buildConstant(
5240+
Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
5241+
case TargetOpcode::G_VECREDUCE_SMIN:
5242+
return MIRBuilder.buildConstant(
5243+
Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
5244+
case TargetOpcode::G_VECREDUCE_FADD:
5245+
return MIRBuilder.buildFConstant(Ty, -0.0);
5246+
case TargetOpcode::G_VECREDUCE_FMUL:
5247+
return MIRBuilder.buildFConstant(Ty, 1.0);
5248+
case TargetOpcode::G_VECREDUCE_FMINIMUM:
5249+
case TargetOpcode::G_VECREDUCE_FMAXIMUM:
5250+
assert(false && "getNeutralElementForVecReduce unimplemented for "
5251+
"G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
5252+
}
5253+
llvm_unreachable("switch expected to return!");
5254+
}
5255+
52195256
LegalizerHelper::LegalizeResult
52205257
LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
52215258
LLT MoreTy) {
@@ -5420,6 +5457,37 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
54205457
Observer.changedInstr(MI);
54215458
return Legalized;
54225459
}
5460+
case TargetOpcode::G_VECREDUCE_FADD:
5461+
case TargetOpcode::G_VECREDUCE_FMUL:
5462+
case TargetOpcode::G_VECREDUCE_ADD:
5463+
case TargetOpcode::G_VECREDUCE_MUL:
5464+
case TargetOpcode::G_VECREDUCE_AND:
5465+
case TargetOpcode::G_VECREDUCE_OR:
5466+
case TargetOpcode::G_VECREDUCE_XOR:
5467+
case TargetOpcode::G_VECREDUCE_SMAX:
5468+
case TargetOpcode::G_VECREDUCE_SMIN:
5469+
case TargetOpcode::G_VECREDUCE_UMAX:
5470+
case TargetOpcode::G_VECREDUCE_UMIN: {
5471+
LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
5472+
MachineOperand &MO = MI.getOperand(1);
5473+
auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
5474+
auto NeutralElement = getNeutralElementForVecReduce(
5475+
MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
5476+
5477+
LLT IdxTy(TLI.getVectorIdxTy(MIRBuilder.getDataLayout()));
5478+
for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
5479+
i != e; i++) {
5480+
auto Idx = MIRBuilder.buildConstant(IdxTy, i);
5481+
NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
5482+
NeutralElement, Idx);
5483+
}
5484+
5485+
Observer.changingInstr(MI);
5486+
MO.setReg(NewVec.getReg(0));
5487+
Observer.changedInstr(MI);
5488+
return Legalized;
5489+
}
5490+
54235491
default:
54245492
return UnableToLegalize;
54255493
}

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,6 +1074,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
10741074
{s16, v8s16},
10751075
{s32, v2s32},
10761076
{s32, v4s32}})
1077+
.moreElementsIf(
1078+
[=](const LegalityQuery &Query) {
1079+
return Query.Types[1].isVector() &&
1080+
Query.Types[1].getElementType() != s8 &&
1081+
Query.Types[1].getNumElements() & 1;
1082+
},
1083+
LegalizeMutations::moreElementsToNextPow2(1))
10771084
.clampMaxNumElements(1, s64, 2)
10781085
.clampMaxNumElements(1, s32, 4)
10791086
.clampMaxNumElements(1, s16, 8)

llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll

Lines changed: 47 additions & 144 deletions
Original file line numberDiff line numberDiff line change
@@ -595,30 +595,14 @@ entry:
595595
}
596596

597597
define i16 @sminv_v3i16(<3 x i16> %a) {
598-
; CHECK-SD-LABEL: sminv_v3i16:
599-
; CHECK-SD: // %bb.0: // %entry
600-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
601-
; CHECK-SD-NEXT: mov w8, #32767 // =0x7fff
602-
; CHECK-SD-NEXT: mov v0.h[3], w8
603-
; CHECK-SD-NEXT: sminv h0, v0.4h
604-
; CHECK-SD-NEXT: fmov w0, s0
605-
; CHECK-SD-NEXT: ret
606-
;
607-
; CHECK-GI-LABEL: sminv_v3i16:
608-
; CHECK-GI: // %bb.0: // %entry
609-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
610-
; CHECK-GI-NEXT: mov h1, v0.h[1]
611-
; CHECK-GI-NEXT: smov w8, v0.h[0]
612-
; CHECK-GI-NEXT: umov w9, v0.h[0]
613-
; CHECK-GI-NEXT: umov w10, v0.h[1]
614-
; CHECK-GI-NEXT: smov w11, v0.h[2]
615-
; CHECK-GI-NEXT: umov w13, v0.h[2]
616-
; CHECK-GI-NEXT: fmov w12, s1
617-
; CHECK-GI-NEXT: cmp w8, w12, sxth
618-
; CHECK-GI-NEXT: csel w8, w9, w10, lt
619-
; CHECK-GI-NEXT: cmp w11, w8, sxth
620-
; CHECK-GI-NEXT: csel w0, w8, w13, gt
621-
; CHECK-GI-NEXT: ret
598+
; CHECK-LABEL: sminv_v3i16:
599+
; CHECK: // %bb.0: // %entry
600+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
601+
; CHECK-NEXT: mov w8, #32767 // =0x7fff
602+
; CHECK-NEXT: mov v0.h[3], w8
603+
; CHECK-NEXT: sminv h0, v0.4h
604+
; CHECK-NEXT: fmov w0, s0
605+
; CHECK-NEXT: ret
622606
entry:
623607
%arg1 = call i16 @llvm.vector.reduce.smin.v3i16(<3 x i16> %a)
624608
ret i16 %arg1
@@ -670,28 +654,13 @@ entry:
670654
}
671655

672656
define i32 @sminv_v3i32(<3 x i32> %a) {
673-
; CHECK-SD-LABEL: sminv_v3i32:
674-
; CHECK-SD: // %bb.0: // %entry
675-
; CHECK-SD-NEXT: mov w8, #2147483647 // =0x7fffffff
676-
; CHECK-SD-NEXT: mov v0.s[3], w8
677-
; CHECK-SD-NEXT: sminv s0, v0.4s
678-
; CHECK-SD-NEXT: fmov w0, s0
679-
; CHECK-SD-NEXT: ret
680-
;
681-
; CHECK-GI-LABEL: sminv_v3i32:
682-
; CHECK-GI: // %bb.0: // %entry
683-
; CHECK-GI-NEXT: mov s1, v0.s[1]
684-
; CHECK-GI-NEXT: fmov w8, s0
685-
; CHECK-GI-NEXT: mov s2, v0.s[2]
686-
; CHECK-GI-NEXT: fmov w9, s1
687-
; CHECK-GI-NEXT: cmp w8, w9
688-
; CHECK-GI-NEXT: fmov w9, s2
689-
; CHECK-GI-NEXT: fcsel s0, s0, s1, lt
690-
; CHECK-GI-NEXT: fmov w8, s0
691-
; CHECK-GI-NEXT: cmp w8, w9
692-
; CHECK-GI-NEXT: fcsel s0, s0, s2, lt
693-
; CHECK-GI-NEXT: fmov w0, s0
694-
; CHECK-GI-NEXT: ret
657+
; CHECK-LABEL: sminv_v3i32:
658+
; CHECK: // %bb.0: // %entry
659+
; CHECK-NEXT: mov w8, #2147483647 // =0x7fffffff
660+
; CHECK-NEXT: mov v0.s[3], w8
661+
; CHECK-NEXT: sminv s0, v0.4s
662+
; CHECK-NEXT: fmov w0, s0
663+
; CHECK-NEXT: ret
695664
entry:
696665
%arg1 = call i32 @llvm.vector.reduce.smin.v3i32(<3 x i32> %a)
697666
ret i32 %arg1
@@ -972,17 +941,10 @@ define i16 @smaxv_v3i16(<3 x i16> %a) {
972941
; CHECK-GI-LABEL: smaxv_v3i16:
973942
; CHECK-GI: // %bb.0: // %entry
974943
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
975-
; CHECK-GI-NEXT: mov h1, v0.h[1]
976-
; CHECK-GI-NEXT: smov w8, v0.h[0]
977-
; CHECK-GI-NEXT: umov w9, v0.h[0]
978-
; CHECK-GI-NEXT: umov w10, v0.h[1]
979-
; CHECK-GI-NEXT: smov w11, v0.h[2]
980-
; CHECK-GI-NEXT: umov w13, v0.h[2]
981-
; CHECK-GI-NEXT: fmov w12, s1
982-
; CHECK-GI-NEXT: cmp w8, w12, sxth
983-
; CHECK-GI-NEXT: csel w8, w9, w10, gt
984-
; CHECK-GI-NEXT: cmp w11, w8, sxth
985-
; CHECK-GI-NEXT: csel w0, w8, w13, lt
944+
; CHECK-GI-NEXT: mov w8, #32768 // =0x8000
945+
; CHECK-GI-NEXT: mov v0.h[3], w8
946+
; CHECK-GI-NEXT: smaxv h0, v0.4h
947+
; CHECK-GI-NEXT: fmov w0, s0
986948
; CHECK-GI-NEXT: ret
987949
entry:
988950
%arg1 = call i16 @llvm.vector.reduce.smax.v3i16(<3 x i16> %a)
@@ -1035,28 +997,13 @@ entry:
1035997
}
1036998

1037999
define i32 @smaxv_v3i32(<3 x i32> %a) {
1038-
; CHECK-SD-LABEL: smaxv_v3i32:
1039-
; CHECK-SD: // %bb.0: // %entry
1040-
; CHECK-SD-NEXT: mov w8, #-2147483648 // =0x80000000
1041-
; CHECK-SD-NEXT: mov v0.s[3], w8
1042-
; CHECK-SD-NEXT: smaxv s0, v0.4s
1043-
; CHECK-SD-NEXT: fmov w0, s0
1044-
; CHECK-SD-NEXT: ret
1045-
;
1046-
; CHECK-GI-LABEL: smaxv_v3i32:
1047-
; CHECK-GI: // %bb.0: // %entry
1048-
; CHECK-GI-NEXT: mov s1, v0.s[1]
1049-
; CHECK-GI-NEXT: fmov w8, s0
1050-
; CHECK-GI-NEXT: mov s2, v0.s[2]
1051-
; CHECK-GI-NEXT: fmov w9, s1
1052-
; CHECK-GI-NEXT: cmp w8, w9
1053-
; CHECK-GI-NEXT: fmov w9, s2
1054-
; CHECK-GI-NEXT: fcsel s0, s0, s1, gt
1055-
; CHECK-GI-NEXT: fmov w8, s0
1056-
; CHECK-GI-NEXT: cmp w8, w9
1057-
; CHECK-GI-NEXT: fcsel s0, s0, s2, gt
1058-
; CHECK-GI-NEXT: fmov w0, s0
1059-
; CHECK-GI-NEXT: ret
1000+
; CHECK-LABEL: smaxv_v3i32:
1001+
; CHECK: // %bb.0: // %entry
1002+
; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000
1003+
; CHECK-NEXT: mov v0.s[3], w8
1004+
; CHECK-NEXT: smaxv s0, v0.4s
1005+
; CHECK-NEXT: fmov w0, s0
1006+
; CHECK-NEXT: ret
10601007
entry:
10611008
%arg1 = call i32 @llvm.vector.reduce.smax.v3i32(<3 x i32> %a)
10621009
ret i32 %arg1
@@ -1335,17 +1282,10 @@ define i16 @uminv_v3i16(<3 x i16> %a) {
13351282
; CHECK-GI-LABEL: uminv_v3i16:
13361283
; CHECK-GI: // %bb.0: // %entry
13371284
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1338-
; CHECK-GI-NEXT: mov h1, v0.h[1]
1339-
; CHECK-GI-NEXT: umov w8, v0.h[0]
1340-
; CHECK-GI-NEXT: umov w9, v0.h[0]
1341-
; CHECK-GI-NEXT: umov w10, v0.h[1]
1342-
; CHECK-GI-NEXT: umov w11, v0.h[2]
1343-
; CHECK-GI-NEXT: umov w13, v0.h[2]
1344-
; CHECK-GI-NEXT: fmov w12, s1
1345-
; CHECK-GI-NEXT: cmp w8, w12, uxth
1346-
; CHECK-GI-NEXT: csel w8, w9, w10, lo
1347-
; CHECK-GI-NEXT: cmp w11, w8, uxth
1348-
; CHECK-GI-NEXT: csel w0, w8, w13, hi
1285+
; CHECK-GI-NEXT: mov w8, #65535 // =0xffff
1286+
; CHECK-GI-NEXT: mov v0.h[3], w8
1287+
; CHECK-GI-NEXT: uminv h0, v0.4h
1288+
; CHECK-GI-NEXT: fmov w0, s0
13491289
; CHECK-GI-NEXT: ret
13501290
entry:
13511291
%arg1 = call i16 @llvm.vector.reduce.umin.v3i16(<3 x i16> %a)
@@ -1398,28 +1338,13 @@ entry:
13981338
}
13991339

14001340
define i32 @uminv_v3i32(<3 x i32> %a) {
1401-
; CHECK-SD-LABEL: uminv_v3i32:
1402-
; CHECK-SD: // %bb.0: // %entry
1403-
; CHECK-SD-NEXT: mov w8, #-1 // =0xffffffff
1404-
; CHECK-SD-NEXT: mov v0.s[3], w8
1405-
; CHECK-SD-NEXT: uminv s0, v0.4s
1406-
; CHECK-SD-NEXT: fmov w0, s0
1407-
; CHECK-SD-NEXT: ret
1408-
;
1409-
; CHECK-GI-LABEL: uminv_v3i32:
1410-
; CHECK-GI: // %bb.0: // %entry
1411-
; CHECK-GI-NEXT: mov s1, v0.s[1]
1412-
; CHECK-GI-NEXT: fmov w8, s0
1413-
; CHECK-GI-NEXT: mov s2, v0.s[2]
1414-
; CHECK-GI-NEXT: fmov w9, s1
1415-
; CHECK-GI-NEXT: cmp w8, w9
1416-
; CHECK-GI-NEXT: fmov w9, s2
1417-
; CHECK-GI-NEXT: fcsel s0, s0, s1, lo
1418-
; CHECK-GI-NEXT: fmov w8, s0
1419-
; CHECK-GI-NEXT: cmp w8, w9
1420-
; CHECK-GI-NEXT: fcsel s0, s0, s2, lo
1421-
; CHECK-GI-NEXT: fmov w0, s0
1422-
; CHECK-GI-NEXT: ret
1341+
; CHECK-LABEL: uminv_v3i32:
1342+
; CHECK: // %bb.0: // %entry
1343+
; CHECK-NEXT: mov w8, #-1 // =0xffffffff
1344+
; CHECK-NEXT: mov v0.s[3], w8
1345+
; CHECK-NEXT: uminv s0, v0.4s
1346+
; CHECK-NEXT: fmov w0, s0
1347+
; CHECK-NEXT: ret
14231348
entry:
14241349
%arg1 = call i32 @llvm.vector.reduce.umin.v3i32(<3 x i32> %a)
14251350
ret i32 %arg1
@@ -1697,17 +1622,10 @@ define i16 @umaxv_v3i16(<3 x i16> %a) {
16971622
; CHECK-GI-LABEL: umaxv_v3i16:
16981623
; CHECK-GI: // %bb.0: // %entry
16991624
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1700-
; CHECK-GI-NEXT: mov h1, v0.h[1]
1701-
; CHECK-GI-NEXT: umov w8, v0.h[0]
1702-
; CHECK-GI-NEXT: umov w9, v0.h[0]
1703-
; CHECK-GI-NEXT: umov w10, v0.h[1]
1704-
; CHECK-GI-NEXT: umov w11, v0.h[2]
1705-
; CHECK-GI-NEXT: umov w13, v0.h[2]
1706-
; CHECK-GI-NEXT: fmov w12, s1
1707-
; CHECK-GI-NEXT: cmp w8, w12, uxth
1708-
; CHECK-GI-NEXT: csel w8, w9, w10, hi
1709-
; CHECK-GI-NEXT: cmp w11, w8, uxth
1710-
; CHECK-GI-NEXT: csel w0, w8, w13, lo
1625+
; CHECK-GI-NEXT: mov w8, #0 // =0x0
1626+
; CHECK-GI-NEXT: mov v0.h[3], w8
1627+
; CHECK-GI-NEXT: umaxv h0, v0.4h
1628+
; CHECK-GI-NEXT: fmov w0, s0
17111629
; CHECK-GI-NEXT: ret
17121630
entry:
17131631
%arg1 = call i16 @llvm.vector.reduce.umax.v3i16(<3 x i16> %a)
@@ -1760,27 +1678,12 @@ entry:
17601678
}
17611679

17621680
define i32 @umaxv_v3i32(<3 x i32> %a) {
1763-
; CHECK-SD-LABEL: umaxv_v3i32:
1764-
; CHECK-SD: // %bb.0: // %entry
1765-
; CHECK-SD-NEXT: mov v0.s[3], wzr
1766-
; CHECK-SD-NEXT: umaxv s0, v0.4s
1767-
; CHECK-SD-NEXT: fmov w0, s0
1768-
; CHECK-SD-NEXT: ret
1769-
;
1770-
; CHECK-GI-LABEL: umaxv_v3i32:
1771-
; CHECK-GI: // %bb.0: // %entry
1772-
; CHECK-GI-NEXT: mov s1, v0.s[1]
1773-
; CHECK-GI-NEXT: fmov w8, s0
1774-
; CHECK-GI-NEXT: mov s2, v0.s[2]
1775-
; CHECK-GI-NEXT: fmov w9, s1
1776-
; CHECK-GI-NEXT: cmp w8, w9
1777-
; CHECK-GI-NEXT: fmov w9, s2
1778-
; CHECK-GI-NEXT: fcsel s0, s0, s1, hi
1779-
; CHECK-GI-NEXT: fmov w8, s0
1780-
; CHECK-GI-NEXT: cmp w8, w9
1781-
; CHECK-GI-NEXT: fcsel s0, s0, s2, hi
1782-
; CHECK-GI-NEXT: fmov w0, s0
1783-
; CHECK-GI-NEXT: ret
1681+
; CHECK-LABEL: umaxv_v3i32:
1682+
; CHECK: // %bb.0: // %entry
1683+
; CHECK-NEXT: mov v0.s[3], wzr
1684+
; CHECK-NEXT: umaxv s0, v0.4s
1685+
; CHECK-NEXT: fmov w0, s0
1686+
; CHECK-NEXT: ret
17841687
entry:
17851688
%arg1 = call i32 @llvm.vector.reduce.umax.v3i32(<3 x i32> %a)
17861689
ret i32 %arg1

llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -187,27 +187,12 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind {
187187
}
188188

189189
define i32 @test_v3i32(<3 x i32> %a) nounwind {
190-
; CHECK-SD-LABEL: test_v3i32:
191-
; CHECK-SD: // %bb.0:
192-
; CHECK-SD-NEXT: mov v0.s[3], wzr
193-
; CHECK-SD-NEXT: umaxv s0, v0.4s
194-
; CHECK-SD-NEXT: fmov w0, s0
195-
; CHECK-SD-NEXT: ret
196-
;
197-
; CHECK-GI-LABEL: test_v3i32:
198-
; CHECK-GI: // %bb.0:
199-
; CHECK-GI-NEXT: mov s1, v0.s[1]
200-
; CHECK-GI-NEXT: fmov w8, s0
201-
; CHECK-GI-NEXT: mov s2, v0.s[2]
202-
; CHECK-GI-NEXT: fmov w9, s1
203-
; CHECK-GI-NEXT: cmp w8, w9
204-
; CHECK-GI-NEXT: fmov w9, s2
205-
; CHECK-GI-NEXT: fcsel s0, s0, s1, hi
206-
; CHECK-GI-NEXT: fmov w8, s0
207-
; CHECK-GI-NEXT: cmp w8, w9
208-
; CHECK-GI-NEXT: fcsel s0, s0, s2, hi
209-
; CHECK-GI-NEXT: fmov w0, s0
210-
; CHECK-GI-NEXT: ret
190+
; CHECK-LABEL: test_v3i32:
191+
; CHECK: // %bb.0:
192+
; CHECK-NEXT: mov v0.s[3], wzr
193+
; CHECK-NEXT: umaxv s0, v0.4s
194+
; CHECK-NEXT: fmov w0, s0
195+
; CHECK-NEXT: ret
211196
%b = call i32 @llvm.vector.reduce.umax.v3i32(<3 x i32> %a)
212197
ret i32 %b
213198
}

0 commit comments

Comments
 (0)