Skip to content

Commit 19c86d9

Browse files
committed
[GlobalISel] Fold G_ICMP if possible
This patch tries to fold `G_ICMP` if possible.
1 parent 3e3f0c3 commit 19c86d9

File tree

9 files changed

+197
-72
lines changed

9 files changed

+197
-72
lines changed

llvm/include/llvm/CodeGen/GlobalISel/Utils.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,10 @@ std::optional<APFloat> ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
313313
std::optional<SmallVector<unsigned>>
314314
ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI);
315315

316+
std::optional<SmallVector<APInt>>
317+
ConstantFoldICmp(unsigned Pred, const Register Op1, const Register Op2,
318+
const MachineRegisterInfo &MRI);
319+
316320
/// Test if the given value is known to have exactly one bit set. This differs
317321
/// from computeKnownBits in that it doesn't necessarily determine which bit is
318322
/// set.

llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,20 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
174174
switch (Opc) {
175175
default:
176176
break;
177+
case TargetOpcode::G_ICMP: {
178+
assert(SrcOps.size() == 3 && "Invalid sources");
179+
assert(DstOps.size() == 1 && "Invalid dsts");
180+
LLT SrcTy = SrcOps[1].getLLTTy(*getMRI());
181+
182+
if (std::optional<SmallVector<APInt>> Cst =
183+
ConstantFoldICmp(SrcOps[0].getPredicate(), SrcOps[1].getReg(),
184+
SrcOps[2].getReg(), *getMRI())) {
185+
if (SrcTy.isVector())
186+
return buildBuildVectorConstant(DstOps[0], *Cst);
187+
return buildConstant(DstOps[0], Cst->front());
188+
}
189+
break;
190+
}
177191
case TargetOpcode::G_ADD:
178192
case TargetOpcode::G_PTR_ADD:
179193
case TargetOpcode::G_AND:

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3741,9 +3741,11 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
37413741
}
37423742
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
37433743
auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
3744-
MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
3744+
auto Tmp = MRI.createGenericVirtualRegister(MRI.getType(OldValRes));
3745+
MIRBuilder.buildAtomicCmpXchg(Tmp, Addr, CmpVal, NewVal,
37453746
**MI.memoperands_begin());
3746-
MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
3747+
MIRBuilder.buildCopy(OldValRes, Tmp);
3748+
MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, Tmp, CmpVal);
37473749
MI.eraseFromParent();
37483750
return Legalized;
37493751
}
@@ -7622,10 +7624,14 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
76227624
LLT Ty = Dst0Ty;
76237625
LLT BoolTy = Dst1Ty;
76247626

7627+
auto Tmp = MRI.createGenericVirtualRegister(MRI.getType(Dst0));
7628+
76257629
if (IsAdd)
7626-
MIRBuilder.buildAdd(Dst0, LHS, RHS);
7630+
MIRBuilder.buildAdd(Tmp, LHS, RHS);
76277631
else
7628-
MIRBuilder.buildSub(Dst0, LHS, RHS);
7632+
MIRBuilder.buildSub(Tmp, LHS, RHS);
7633+
7634+
MIRBuilder.buildCopy(Dst0, Tmp);
76297635

76307636
// TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
76317637

@@ -7638,7 +7644,7 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
76387644
// (LHS) if and only if the other operand (RHS) is (non-zero) positive,
76397645
// otherwise there will be overflow.
76407646
auto ResultLowerThanLHS =
7641-
MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
7647+
MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Tmp, LHS);
76427648
auto ConditionRHS = MIRBuilder.buildICmp(
76437649
IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
76447650

llvm/lib/CodeGen/GlobalISel/Utils.cpp

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -996,6 +996,74 @@ llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) {
996996
return std::nullopt;
997997
}
998998

999+
std::optional<SmallVector<APInt>>
1000+
llvm::ConstantFoldICmp(unsigned Pred, const Register Op1, const Register Op2,
1001+
const MachineRegisterInfo &MRI) {
1002+
LLT Ty = MRI.getType(Op1);
1003+
if (Ty != MRI.getType(Op2))
1004+
return std::nullopt;
1005+
1006+
auto TryFoldScalar = [&MRI, Pred](Register LHS,
1007+
Register RHS) -> std::optional<APInt> {
1008+
auto LHSCst = getIConstantVRegVal(LHS, MRI);
1009+
auto RHSCst = getIConstantVRegVal(RHS, MRI);
1010+
if (!LHSCst || !RHSCst)
1011+
return std::nullopt;
1012+
1013+
switch (Pred) {
1014+
case CmpInst::Predicate::ICMP_EQ:
1015+
return APInt(/*numBits=*/1, LHSCst->eq(*RHSCst));
1016+
case CmpInst::Predicate::ICMP_NE:
1017+
return APInt(/*numBits=*/1, LHSCst->ne(*RHSCst));
1018+
case CmpInst::Predicate::ICMP_UGT:
1019+
return APInt(/*numBits=*/1, LHSCst->ugt(*RHSCst));
1020+
case CmpInst::Predicate::ICMP_UGE:
1021+
return APInt(/*numBits=*/1, LHSCst->uge(*RHSCst));
1022+
case CmpInst::Predicate::ICMP_ULT:
1023+
return APInt(/*numBits=*/1, LHSCst->ult(*RHSCst));
1024+
case CmpInst::Predicate::ICMP_ULE:
1025+
return APInt(/*numBits=*/1, LHSCst->ule(*RHSCst));
1026+
case CmpInst::Predicate::ICMP_SGT:
1027+
return APInt(/*numBits=*/1, LHSCst->sgt(*RHSCst));
1028+
case CmpInst::Predicate::ICMP_SGE:
1029+
return APInt(/*numBits=*/1, LHSCst->sge(*RHSCst));
1030+
case CmpInst::Predicate::ICMP_SLT:
1031+
return APInt(/*numBits=*/1, LHSCst->slt(*RHSCst));
1032+
case CmpInst::Predicate::ICMP_SLE:
1033+
return APInt(/*numBits=*/1, LHSCst->sle(*RHSCst));
1034+
default:
1035+
return std::nullopt;
1036+
}
1037+
};
1038+
1039+
SmallVector<APInt> FoldedICmps;
1040+
1041+
if (Ty.isVector()) {
1042+
// Try to constant fold each element.
1043+
auto *BV1 = getOpcodeDef<GBuildVector>(Op1, MRI);
1044+
auto *BV2 = getOpcodeDef<GBuildVector>(Op2, MRI);
1045+
if (!BV1 || !BV2)
1046+
return std::nullopt;
1047+
assert(BV1->getNumSources() == BV2->getNumSources() && "Invalid vectors");
1048+
for (unsigned I = 0; I < BV1->getNumSources(); ++I) {
1049+
if (auto MaybeFold =
1050+
TryFoldScalar(BV1->getSourceReg(I), BV2->getSourceReg(I))) {
1051+
FoldedICmps.emplace_back(*MaybeFold);
1052+
continue;
1053+
}
1054+
return std::nullopt;
1055+
}
1056+
return FoldedICmps;
1057+
}
1058+
1059+
if (auto MaybeCst = TryFoldScalar(Op1, Op2)) {
1060+
FoldedICmps.emplace_back(*MaybeCst);
1061+
return FoldedICmps;
1062+
}
1063+
1064+
return std::nullopt;
1065+
}
1066+
9991067
bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
10001068
GISelKnownBits *KB) {
10011069
std::optional<DefinitionAndSourceRegister> DefSrcReg =

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@ body: |
1515
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3
1616
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
1717
; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 1)
18+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AMDGPU_ATOMIC_CMPXCHG]](s32)
1819
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s32), [[COPY1]]
19-
; CHECK-NEXT: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
20+
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](s32), implicit [[ICMP]](s1)
2021
%0:_(p1) = COPY $vgpr0_vgpr1
2122
%1:_(s32) = COPY $vgpr2
2223
%2:_(s32) = COPY $vgpr3
@@ -39,8 +40,9 @@ body: |
3940
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3
4041
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
4142
; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (s32))
43+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AMDGPU_ATOMIC_CMPXCHG]](s32)
4244
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s32), [[COPY1]]
43-
; CHECK-NEXT: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
45+
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](s32), implicit [[ICMP]](s1)
4446
%0:_(p0) = COPY $vgpr0_vgpr1
4547
%1:_(s32) = COPY $vgpr2
4648
%2:_(s32) = COPY $vgpr3
@@ -62,8 +64,9 @@ body: |
6264
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
6365
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
6466
; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 3)
67+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ATOMIC_CMPXCHG]](s32)
6568
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s32), [[COPY1]]
66-
; CHECK-NEXT: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
69+
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](s32), implicit [[ICMP]](s1)
6770
%0:_(p3) = COPY $vgpr0
6871
%1:_(s32) = COPY $vgpr1
6972
%2:_(s32) = COPY $vgpr2
@@ -86,8 +89,9 @@ body: |
8689
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
8790
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY1]](s64)
8891
; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (s64), addrspace 1)
92+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[AMDGPU_ATOMIC_CMPXCHG]](s64)
8993
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s64), [[COPY1]]
90-
; CHECK-NEXT: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s64), implicit [[ICMP]](s1)
94+
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](s64), implicit [[ICMP]](s1)
9195
%0:_(p1) = COPY $vgpr0_vgpr1
9296
%1:_(s64) = COPY $vgpr2_vgpr3
9397
%2:_(s64) = COPY $vgpr4_vgpr5
@@ -109,8 +113,9 @@ body: |
109113
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2
110114
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr3_vgpr4
111115
; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic (s64), addrspace 3)
116+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[ATOMIC_CMPXCHG]](s64)
112117
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s64), [[COPY1]]
113-
; CHECK-NEXT: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s64), implicit [[ICMP]](s1)
118+
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](s64), implicit [[ICMP]](s1)
114119
%0:_(p3) = COPY $vgpr0
115120
%1:_(s64) = COPY $vgpr1_vgpr2
116121
%2:_(s64) = COPY $vgpr3_vgpr4

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -82,12 +82,13 @@ body: |
8282
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
8383
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
8484
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]]
85+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ADD]](s32)
8586
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
8687
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[COPY]]
8788
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C]]
8889
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
8990
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1)
90-
; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32)
91+
; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
9192
; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32)
9293
%0:_(s32) = COPY $vgpr0
9394
%1:_(s32) = COPY $vgpr1
@@ -113,12 +114,13 @@ body: |
113114
; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
114115
; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
115116
; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
117+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
116118
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
117119
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]]
118120
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]]
119121
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
120122
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1)
121-
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
123+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](s64)
122124
; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32)
123125
%0:_(s64) = COPY $vgpr0_vgpr1
124126
%1:_(s64) = COPY $vgpr2_vgpr3
@@ -152,6 +154,7 @@ body: |
152154
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
153155
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
154156
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
157+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY [[BITCAST2]](<2 x s16>)
155158
; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
156159
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
157160
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16
@@ -164,8 +167,8 @@ body: |
164167
; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
165168
; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16
166169
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
167-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
168-
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[COPY2]]
170+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
171+
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[COPY3]]
169172
; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16
170173
; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG5]](s32), [[C2]]
171174
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]]
@@ -176,7 +179,7 @@ body: |
176179
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]]
177180
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]]
178181
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND2]](s32), [[AND3]](s32)
179-
; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
182+
; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](<2 x s16>)
180183
; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>)
181184
%0:_(<2 x s16>) = COPY $vgpr0
182185
%1:_(<2 x s16>) = COPY $vgpr1
@@ -318,6 +321,7 @@ body: |
318321
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
319322
; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
320323
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
324+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY [[CONCAT_VECTORS]](<4 x s16>)
321325
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
322326
; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
323327
; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
@@ -342,14 +346,14 @@ body: |
342346
; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32)
343347
; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST8]], 16
344348
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
345-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
346-
; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG8]](s32), [[COPY2]]
347-
; CHECK-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR6]], 16
348349
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
349-
; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG9]](s32), [[COPY3]]
350-
; CHECK-NEXT: [[SEXT_INREG10:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST9]], 16
350+
; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG8]](s32), [[COPY3]]
351+
; CHECK-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR6]], 16
351352
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
352-
; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG10]](s32), [[COPY4]]
353+
; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG9]](s32), [[COPY4]]
354+
; CHECK-NEXT: [[SEXT_INREG10:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST9]], 16
355+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
356+
; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG10]](s32), [[COPY5]]
353357
; CHECK-NEXT: [[SEXT_INREG11:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR7]], 16
354358
; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG11]](s32), [[C2]]
355359
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP]]
@@ -366,7 +370,7 @@ body: |
366370
; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]]
367371
; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C3]]
368372
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32)
369-
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
373+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](<4 x s16>)
370374
; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>)
371375
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
372376
%1:_(<4 x s16>) = COPY $vgpr1_vgpr2
@@ -392,6 +396,7 @@ body: |
392396
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV2]]
393397
; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]]
394398
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32)
399+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY [[BUILD_VECTOR]](<2 x s32>)
395400
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
396401
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
397402
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[UV4]]
@@ -407,7 +412,7 @@ body: |
407412
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]]
408413
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]]
409414
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32)
410-
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
415+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](<2 x s32>)
411416
; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>)
412417
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
413418
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3

0 commit comments

Comments
 (0)