Skip to content

Commit cf65afb

Browse files
committed
[AArch64][GISel] Extend lowering for fp round intrinsics.
This extends the lowering of ceil, floor, nearbyint, rint, round, roundeven and trunc. They are all very similar, so can reuse the same legalization info. selectIntrinsicTrunc and selectIntrinsicRound can be removed as they can be selected via tablegen patterns, and G_INTRINSIC_ROUNDEVEN is marked as a gisel equivalent of froundeven. Otherwise this reuses the existing code, filling it out to handle more types. Differential Revision: https://reviews.llvm.org/D157679
1 parent f2583f3 commit cf65afb

13 files changed

+1302
-1719
lines changed

llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ def : GINodeEquiv<G_FRINT, frint>;
144144
def : GINodeEquiv<G_FNEARBYINT, fnearbyint>;
145145
def : GINodeEquiv<G_INTRINSIC_TRUNC, ftrunc>;
146146
def : GINodeEquiv<G_INTRINSIC_ROUND, fround>;
147+
def : GINodeEquiv<G_INTRINSIC_ROUNDEVEN, froundeven>;
147148
def : GINodeEquiv<G_INTRINSIC_LRINT, lrint>;
148149
def : GINodeEquiv<G_FCOPYSIGN, fcopysign>;
149150
def : GINodeEquiv<G_SMIN, smin>;

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4855,6 +4855,13 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
48554855
case TargetOpcode::G_FNEG:
48564856
case TargetOpcode::G_FABS:
48574857
case TargetOpcode::G_FSQRT:
4858+
case TargetOpcode::G_FCEIL:
4859+
case TargetOpcode::G_FFLOOR:
4860+
case TargetOpcode::G_FNEARBYINT:
4861+
case TargetOpcode::G_FRINT:
4862+
case TargetOpcode::G_INTRINSIC_ROUND:
4863+
case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
4864+
case TargetOpcode::G_INTRINSIC_TRUNC:
48584865
case TargetOpcode::G_BSWAP:
48594866
case TargetOpcode::G_FCANONICALIZE:
48604867
case TargetOpcode::G_SEXT_INREG:

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 0 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,6 @@ class AArch64InstructionSelector : public InstructionSelector {
190190
MachineRegisterInfo &MRI);
191191
bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
192192
bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
193-
bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
194-
bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
195193
bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
196194
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
197195
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
@@ -3494,10 +3492,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
34943492
return false;
34953493
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
34963494
}
3497-
case TargetOpcode::G_INTRINSIC_TRUNC:
3498-
return selectIntrinsicTrunc(I, MRI);
3499-
case TargetOpcode::G_INTRINSIC_ROUND:
3500-
return selectIntrinsicRound(I, MRI);
35013495
case TargetOpcode::G_BUILD_VECTOR:
35023496
return selectBuildVector(I, MRI);
35033497
case TargetOpcode::G_MERGE_VALUES:
@@ -3696,116 +3690,6 @@ bool AArch64InstructionSelector::selectTLSGlobalValue(
36963690
return true;
36973691
}
36983692

3699-
bool AArch64InstructionSelector::selectIntrinsicTrunc(
3700-
MachineInstr &I, MachineRegisterInfo &MRI) const {
3701-
const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3702-
3703-
// Select the correct opcode.
3704-
unsigned Opc = 0;
3705-
if (!SrcTy.isVector()) {
3706-
switch (SrcTy.getSizeInBits()) {
3707-
default:
3708-
case 16:
3709-
Opc = AArch64::FRINTZHr;
3710-
break;
3711-
case 32:
3712-
Opc = AArch64::FRINTZSr;
3713-
break;
3714-
case 64:
3715-
Opc = AArch64::FRINTZDr;
3716-
break;
3717-
}
3718-
} else {
3719-
unsigned NumElts = SrcTy.getNumElements();
3720-
switch (SrcTy.getElementType().getSizeInBits()) {
3721-
default:
3722-
break;
3723-
case 16:
3724-
if (NumElts == 4)
3725-
Opc = AArch64::FRINTZv4f16;
3726-
else if (NumElts == 8)
3727-
Opc = AArch64::FRINTZv8f16;
3728-
break;
3729-
case 32:
3730-
if (NumElts == 2)
3731-
Opc = AArch64::FRINTZv2f32;
3732-
else if (NumElts == 4)
3733-
Opc = AArch64::FRINTZv4f32;
3734-
break;
3735-
case 64:
3736-
if (NumElts == 2)
3737-
Opc = AArch64::FRINTZv2f64;
3738-
break;
3739-
}
3740-
}
3741-
3742-
if (!Opc) {
3743-
// Didn't get an opcode above, bail.
3744-
LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
3745-
return false;
3746-
}
3747-
3748-
// Legalization would have set us up perfectly for this; we just need to
3749-
// set the opcode and move on.
3750-
I.setDesc(TII.get(Opc));
3751-
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3752-
}
3753-
3754-
bool AArch64InstructionSelector::selectIntrinsicRound(
3755-
MachineInstr &I, MachineRegisterInfo &MRI) const {
3756-
const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3757-
3758-
// Select the correct opcode.
3759-
unsigned Opc = 0;
3760-
if (!SrcTy.isVector()) {
3761-
switch (SrcTy.getSizeInBits()) {
3762-
default:
3763-
case 16:
3764-
Opc = AArch64::FRINTAHr;
3765-
break;
3766-
case 32:
3767-
Opc = AArch64::FRINTASr;
3768-
break;
3769-
case 64:
3770-
Opc = AArch64::FRINTADr;
3771-
break;
3772-
}
3773-
} else {
3774-
unsigned NumElts = SrcTy.getNumElements();
3775-
switch (SrcTy.getElementType().getSizeInBits()) {
3776-
default:
3777-
break;
3778-
case 16:
3779-
if (NumElts == 4)
3780-
Opc = AArch64::FRINTAv4f16;
3781-
else if (NumElts == 8)
3782-
Opc = AArch64::FRINTAv8f16;
3783-
break;
3784-
case 32:
3785-
if (NumElts == 2)
3786-
Opc = AArch64::FRINTAv2f32;
3787-
else if (NumElts == 4)
3788-
Opc = AArch64::FRINTAv4f32;
3789-
break;
3790-
case 64:
3791-
if (NumElts == 2)
3792-
Opc = AArch64::FRINTAv2f64;
3793-
break;
3794-
}
3795-
}
3796-
3797-
if (!Opc) {
3798-
// Didn't get an opcode above, bail.
3799-
LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
3800-
return false;
3801-
}
3802-
3803-
// Legalization would have set us up perfectly for this; we just need to
3804-
// set the opcode and move on.
3805-
I.setDesc(TII.get(Opc));
3806-
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3807-
}
3808-
38093693
bool AArch64InstructionSelector::selectVectorICmp(
38103694
MachineInstr &I, MachineRegisterInfo &MRI) {
38113695
Register DstReg = I.getOperand(0).getReg();

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -243,9 +243,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
243243

244244
getActionDefinitionsBuilder(G_FREM).libcallFor({s32, s64});
245245

246-
getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR, G_FRINT, G_FMA,
247-
G_INTRINSIC_TRUNC, G_INTRINSIC_ROUND,
248-
G_FNEARBYINT, G_INTRINSIC_LRINT})
246+
getActionDefinitionsBuilder({G_FMA, G_INTRINSIC_LRINT})
249247
// If we don't have full FP16 support, then scalarize the elements of
250248
// vectors containing fp16 types.
251249
.fewerElementsIf(
@@ -936,8 +934,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
936934
// TODO: Vector types.
937935
getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT}).lowerIf(isScalar(0));
938936

939-
getActionDefinitionsBuilder(
940-
{G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM, G_FMAXIMUM, G_FMINIMUM})
937+
getActionDefinitionsBuilder({G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM, G_FMAXIMUM,
938+
G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT,
939+
G_FNEARBYINT, G_INTRINSIC_TRUNC,
940+
G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
941941
.legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
942942
.legalIf([=](const LegalityQuery &Query) {
943943
const auto &Ty = Query.Types[0];

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,7 @@ static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
431431
case TargetOpcode::G_FRINT:
432432
case TargetOpcode::G_INTRINSIC_TRUNC:
433433
case TargetOpcode::G_INTRINSIC_ROUND:
434+
case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
434435
case TargetOpcode::G_FMAXNUM:
435436
case TargetOpcode::G_FMINNUM:
436437
case TargetOpcode::G_FMAXIMUM:

llvm/test/CodeGen/AArch64/GlobalISel/legalize-ceil.mir

Lines changed: 13 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -25,33 +25,15 @@ body: |
2525
; CHECK: liveins: $q0
2626
; CHECK-NEXT: {{ $}}
2727
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
28-
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
29-
; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
30-
; CHECK-NEXT: [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT]]
31-
; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL]](s32)
32-
; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
33-
; CHECK-NEXT: [[FCEIL1:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT1]]
34-
; CHECK-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL1]](s32)
35-
; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
36-
; CHECK-NEXT: [[FCEIL2:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT2]]
37-
; CHECK-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL2]](s32)
38-
; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
39-
; CHECK-NEXT: [[FCEIL3:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT3]]
40-
; CHECK-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL3]](s32)
41-
; CHECK-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16)
42-
; CHECK-NEXT: [[FCEIL4:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT4]]
43-
; CHECK-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL4]](s32)
44-
; CHECK-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16)
45-
; CHECK-NEXT: [[FCEIL5:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT5]]
46-
; CHECK-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL5]](s32)
47-
; CHECK-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16)
48-
; CHECK-NEXT: [[FCEIL6:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT6]]
49-
; CHECK-NEXT: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL6]](s32)
50-
; CHECK-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16)
51-
; CHECK-NEXT: [[FCEIL7:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT7]]
52-
; CHECK-NEXT: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL7]](s32)
53-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16), [[FPTRUNC4]](s16), [[FPTRUNC5]](s16), [[FPTRUNC6]](s16), [[FPTRUNC7]](s16)
54-
; CHECK-NEXT: $q0 = COPY [[BUILD_VECTOR]](<8 x s16>)
28+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
29+
; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>)
30+
; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>)
31+
; CHECK-NEXT: [[FCEIL:%[0-9]+]]:_(<4 x s32>) = G_FCEIL [[FPEXT]]
32+
; CHECK-NEXT: [[FCEIL1:%[0-9]+]]:_(<4 x s32>) = G_FCEIL [[FPEXT1]]
33+
; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FCEIL]](<4 x s32>)
34+
; CHECK-NEXT: [[FPTRUNC1:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FCEIL1]](<4 x s32>)
35+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[FPTRUNC]](<4 x s16>), [[FPTRUNC1]](<4 x s16>)
36+
; CHECK-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>)
5537
; CHECK-NEXT: RET_ReallyLR implicit $q0
5638
%0:_(<8 x s16>) = COPY $q0
5739
%1:_(<8 x s16>) = G_FCEIL %0
@@ -73,21 +55,10 @@ body: |
7355
; CHECK: liveins: $d0
7456
; CHECK-NEXT: {{ $}}
7557
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
76-
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
77-
; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
78-
; CHECK-NEXT: [[FCEIL:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT]]
79-
; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL]](s32)
80-
; CHECK-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
81-
; CHECK-NEXT: [[FCEIL1:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT1]]
82-
; CHECK-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL1]](s32)
83-
; CHECK-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
84-
; CHECK-NEXT: [[FCEIL2:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT2]]
85-
; CHECK-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL2]](s32)
86-
; CHECK-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
87-
; CHECK-NEXT: [[FCEIL3:%[0-9]+]]:_(s32) = G_FCEIL [[FPEXT3]]
88-
; CHECK-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FCEIL3]](s32)
89-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16)
90-
; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>)
58+
; CHECK-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>)
59+
; CHECK-NEXT: [[FCEIL:%[0-9]+]]:_(<4 x s32>) = G_FCEIL [[FPEXT]]
60+
; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FCEIL]](<4 x s32>)
61+
; CHECK-NEXT: $d0 = COPY [[FPTRUNC]](<4 x s16>)
9162
; CHECK-NEXT: RET_ReallyLR implicit $d0
9263
%0:_(<4 x s16>) = COPY $d0
9364
%1:_(<4 x s16>) = G_FCEIL %0

llvm/test/CodeGen/AArch64/GlobalISel/legalize-frint.mir

Lines changed: 13 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -149,21 +149,10 @@ body: |
149149
; NOFP16: liveins: $d0
150150
; NOFP16-NEXT: {{ $}}
151151
; NOFP16-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
152-
; NOFP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
153-
; NOFP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
154-
; NOFP16-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT]]
155-
; NOFP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT]](s32)
156-
; NOFP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
157-
; NOFP16-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT1]]
158-
; NOFP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT1]](s32)
159-
; NOFP16-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
160-
; NOFP16-NEXT: [[FRINT2:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT2]]
161-
; NOFP16-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT2]](s32)
162-
; NOFP16-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
163-
; NOFP16-NEXT: [[FRINT3:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT3]]
164-
; NOFP16-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT3]](s32)
165-
; NOFP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16)
166-
; NOFP16-NEXT: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>)
152+
; NOFP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[COPY]](<4 x s16>)
153+
; NOFP16-NEXT: [[FRINT:%[0-9]+]]:_(<4 x s32>) = G_FRINT [[FPEXT]]
154+
; NOFP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FRINT]](<4 x s32>)
155+
; NOFP16-NEXT: $d0 = COPY [[FPTRUNC]](<4 x s16>)
167156
; NOFP16-NEXT: RET_ReallyLR implicit $d0
168157
;
169158
; FP16-LABEL: name: test_v4f16.rint
@@ -192,33 +181,15 @@ body: |
192181
; NOFP16: liveins: $q0
193182
; NOFP16-NEXT: {{ $}}
194183
; NOFP16-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
195-
; NOFP16-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
196-
; NOFP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[UV]](s16)
197-
; NOFP16-NEXT: [[FRINT:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT]]
198-
; NOFP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT]](s32)
199-
; NOFP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[UV1]](s16)
200-
; NOFP16-NEXT: [[FRINT1:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT1]]
201-
; NOFP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT1]](s32)
202-
; NOFP16-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[UV2]](s16)
203-
; NOFP16-NEXT: [[FRINT2:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT2]]
204-
; NOFP16-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT2]](s32)
205-
; NOFP16-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[UV3]](s16)
206-
; NOFP16-NEXT: [[FRINT3:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT3]]
207-
; NOFP16-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT3]](s32)
208-
; NOFP16-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[UV4]](s16)
209-
; NOFP16-NEXT: [[FRINT4:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT4]]
210-
; NOFP16-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT4]](s32)
211-
; NOFP16-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[UV5]](s16)
212-
; NOFP16-NEXT: [[FRINT5:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT5]]
213-
; NOFP16-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT5]](s32)
214-
; NOFP16-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[UV6]](s16)
215-
; NOFP16-NEXT: [[FRINT6:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT6]]
216-
; NOFP16-NEXT: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT6]](s32)
217-
; NOFP16-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[UV7]](s16)
218-
; NOFP16-NEXT: [[FRINT7:%[0-9]+]]:_(s32) = G_FRINT [[FPEXT7]]
219-
; NOFP16-NEXT: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FRINT7]](s32)
220-
; NOFP16-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[FPTRUNC]](s16), [[FPTRUNC1]](s16), [[FPTRUNC2]](s16), [[FPTRUNC3]](s16), [[FPTRUNC4]](s16), [[FPTRUNC5]](s16), [[FPTRUNC6]](s16), [[FPTRUNC7]](s16)
221-
; NOFP16-NEXT: $q0 = COPY [[BUILD_VECTOR]](<8 x s16>)
184+
; NOFP16-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
185+
; NOFP16-NEXT: [[FPEXT:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV]](<4 x s16>)
186+
; NOFP16-NEXT: [[FPEXT1:%[0-9]+]]:_(<4 x s32>) = G_FPEXT [[UV1]](<4 x s16>)
187+
; NOFP16-NEXT: [[FRINT:%[0-9]+]]:_(<4 x s32>) = G_FRINT [[FPEXT]]
188+
; NOFP16-NEXT: [[FRINT1:%[0-9]+]]:_(<4 x s32>) = G_FRINT [[FPEXT1]]
189+
; NOFP16-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FRINT]](<4 x s32>)
190+
; NOFP16-NEXT: [[FPTRUNC1:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[FRINT1]](<4 x s32>)
191+
; NOFP16-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[FPTRUNC]](<4 x s16>), [[FPTRUNC1]](<4 x s16>)
192+
; NOFP16-NEXT: $q0 = COPY [[CONCAT_VECTORS]](<8 x s16>)
222193
; NOFP16-NEXT: RET_ReallyLR implicit $q0
223194
;
224195
; FP16-LABEL: name: test_v8f16.rint

0 commit comments

Comments
 (0)