Skip to content

Commit f462e85

Browse files
committed
[GlobalISel] Fold G_ICMP if possible
This patch tries to fold `G_ICMP` if possible.
1 parent 96c8e2e commit f462e85

File tree

9 files changed

+226
-65
lines changed

9 files changed

+226
-65
lines changed

llvm/include/llvm/CodeGen/GlobalISel/Utils.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,10 @@ std::optional<SmallVector<unsigned>>
315315
ConstantFoldCountZeros(Register Src, const MachineRegisterInfo &MRI,
316316
std::function<unsigned(APInt)> CB);
317317

318+
std::optional<SmallVector<APInt>>
319+
ConstantFoldICmp(unsigned Pred, const Register Op1, const Register Op2,
320+
const MachineRegisterInfo &MRI);
321+
318322
/// Test if the given value is known to have exactly one bit set. This differs
319323
/// from computeKnownBits in that it doesn't necessarily determine which bit is
320324
/// set.

llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,20 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
174174
switch (Opc) {
175175
default:
176176
break;
177+
case TargetOpcode::G_ICMP: {
178+
assert(SrcOps.size() == 3 && "Invalid sources");
179+
assert(DstOps.size() == 1 && "Invalid dsts");
180+
LLT SrcTy = SrcOps[1].getLLTTy(*getMRI());
181+
182+
if (std::optional<SmallVector<APInt>> Cst =
183+
ConstantFoldICmp(SrcOps[0].getPredicate(), SrcOps[1].getReg(),
184+
SrcOps[2].getReg(), *getMRI())) {
185+
if (SrcTy.isVector())
186+
return buildBuildVectorConstant(DstOps[0], *Cst);
187+
return buildConstant(DstOps[0], Cst->front());
188+
}
189+
break;
190+
}
177191
case TargetOpcode::G_ADD:
178192
case TargetOpcode::G_PTR_ADD:
179193
case TargetOpcode::G_AND:

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3768,9 +3768,13 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
37683768
}
37693769
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
37703770
auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
3771-
MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
3771+
Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
3772+
Register NewSuccessRes = MRI.cloneVirtualRegister(SuccessRes);
3773+
MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
37723774
**MI.memoperands_begin());
3773-
MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
3775+
MIRBuilder.buildICmp(CmpInst::ICMP_EQ, NewSuccessRes, NewOldValRes, CmpVal);
3776+
MIRBuilder.buildCopy(OldValRes, NewOldValRes);
3777+
MIRBuilder.buildCopy(SuccessRes, NewSuccessRes);
37743778
MI.eraseFromParent();
37753779
return Legalized;
37763780
}
@@ -7657,10 +7661,13 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
76577661
LLT Ty = Dst0Ty;
76587662
LLT BoolTy = Dst1Ty;
76597663

7664+
Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
7665+
Register NewDst1 = MRI.cloneVirtualRegister(Dst1);
7666+
76607667
if (IsAdd)
7661-
MIRBuilder.buildAdd(Dst0, LHS, RHS);
7668+
MIRBuilder.buildAdd(NewDst0, LHS, RHS);
76627669
else
7663-
MIRBuilder.buildSub(Dst0, LHS, RHS);
7670+
MIRBuilder.buildSub(NewDst0, LHS, RHS);
76647671

76657672
// TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
76667673

@@ -7673,12 +7680,16 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
76737680
// (LHS) if and only if the other operand (RHS) is (non-zero) positive,
76747681
// otherwise there will be overflow.
76757682
auto ResultLowerThanLHS =
7676-
MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
7683+
MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
76777684
auto ConditionRHS = MIRBuilder.buildICmp(
76787685
IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
76797686

7680-
MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
7687+
MIRBuilder.buildXor(NewDst1, ConditionRHS, ResultLowerThanLHS);
7688+
7689+
MIRBuilder.buildCopy(Dst0, NewDst0);
7690+
MIRBuilder.buildCopy(Dst1, NewDst1);
76817691
MI.eraseFromParent();
7692+
76827693
return Legalized;
76837694
}
76847695

llvm/lib/CodeGen/GlobalISel/Utils.cpp

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -997,6 +997,74 @@ llvm::ConstantFoldCountZeros(Register Src, const MachineRegisterInfo &MRI,
997997
return std::nullopt;
998998
}
999999

1000+
std::optional<SmallVector<APInt>>
1001+
llvm::ConstantFoldICmp(unsigned Pred, const Register Op1, const Register Op2,
1002+
const MachineRegisterInfo &MRI) {
1003+
LLT Ty = MRI.getType(Op1);
1004+
if (Ty != MRI.getType(Op2))
1005+
return std::nullopt;
1006+
1007+
auto TryFoldScalar = [&MRI, Pred](Register LHS,
1008+
Register RHS) -> std::optional<APInt> {
1009+
auto LHSCst = getIConstantVRegVal(LHS, MRI);
1010+
auto RHSCst = getIConstantVRegVal(RHS, MRI);
1011+
if (!LHSCst || !RHSCst)
1012+
return std::nullopt;
1013+
1014+
switch (Pred) {
1015+
case CmpInst::Predicate::ICMP_EQ:
1016+
return APInt(/*numBits=*/1, LHSCst->eq(*RHSCst));
1017+
case CmpInst::Predicate::ICMP_NE:
1018+
return APInt(/*numBits=*/1, LHSCst->ne(*RHSCst));
1019+
case CmpInst::Predicate::ICMP_UGT:
1020+
return APInt(/*numBits=*/1, LHSCst->ugt(*RHSCst));
1021+
case CmpInst::Predicate::ICMP_UGE:
1022+
return APInt(/*numBits=*/1, LHSCst->uge(*RHSCst));
1023+
case CmpInst::Predicate::ICMP_ULT:
1024+
return APInt(/*numBits=*/1, LHSCst->ult(*RHSCst));
1025+
case CmpInst::Predicate::ICMP_ULE:
1026+
return APInt(/*numBits=*/1, LHSCst->ule(*RHSCst));
1027+
case CmpInst::Predicate::ICMP_SGT:
1028+
return APInt(/*numBits=*/1, LHSCst->sgt(*RHSCst));
1029+
case CmpInst::Predicate::ICMP_SGE:
1030+
return APInt(/*numBits=*/1, LHSCst->sge(*RHSCst));
1031+
case CmpInst::Predicate::ICMP_SLT:
1032+
return APInt(/*numBits=*/1, LHSCst->slt(*RHSCst));
1033+
case CmpInst::Predicate::ICMP_SLE:
1034+
return APInt(/*numBits=*/1, LHSCst->sle(*RHSCst));
1035+
default:
1036+
return std::nullopt;
1037+
}
1038+
};
1039+
1040+
SmallVector<APInt> FoldedICmps;
1041+
1042+
if (Ty.isVector()) {
1043+
// Try to constant fold each element.
1044+
auto *BV1 = getOpcodeDef<GBuildVector>(Op1, MRI);
1045+
auto *BV2 = getOpcodeDef<GBuildVector>(Op2, MRI);
1046+
if (!BV1 || !BV2)
1047+
return std::nullopt;
1048+
assert(BV1->getNumSources() == BV2->getNumSources() && "Invalid vectors");
1049+
for (unsigned I = 0; I < BV1->getNumSources(); ++I) {
1050+
if (auto MaybeFold =
1051+
TryFoldScalar(BV1->getSourceReg(I), BV2->getSourceReg(I))) {
1052+
FoldedICmps.emplace_back(*MaybeFold);
1053+
continue;
1054+
}
1055+
return std::nullopt;
1056+
}
1057+
return FoldedICmps;
1058+
}
1059+
1060+
if (auto MaybeCst = TryFoldScalar(Op1, Op2)) {
1061+
FoldedICmps.emplace_back(*MaybeCst);
1062+
return FoldedICmps;
1063+
}
1064+
1065+
return std::nullopt;
1066+
}
1067+
10001068
bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
10011069
GISelKnownBits *KB) {
10021070
std::optional<DefinitionAndSourceRegister> DefSrcReg =

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-atomic-cmpxchg-with-success.mir

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@ body: |
1616
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
1717
; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 1)
1818
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s32), [[COPY1]]
19-
; CHECK-NEXT: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
19+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AMDGPU_ATOMIC_CMPXCHG]](s32)
20+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s1) = COPY [[ICMP]](s1)
21+
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](s32), implicit [[COPY4]](s1)
2022
%0:_(p1) = COPY $vgpr0_vgpr1
2123
%1:_(s32) = COPY $vgpr2
2224
%2:_(s32) = COPY $vgpr3
@@ -40,7 +42,9 @@ body: |
4042
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY1]](s32)
4143
; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p0), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (s32))
4244
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s32), [[COPY1]]
43-
; CHECK-NEXT: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
45+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AMDGPU_ATOMIC_CMPXCHG]](s32)
46+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s1) = COPY [[ICMP]](s1)
47+
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](s32), implicit [[COPY4]](s1)
4448
%0:_(p0) = COPY $vgpr0_vgpr1
4549
%1:_(s32) = COPY $vgpr2
4650
%2:_(s32) = COPY $vgpr3
@@ -63,7 +67,9 @@ body: |
6367
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
6468
; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s32) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic (s32), addrspace 3)
6569
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s32), [[COPY1]]
66-
; CHECK-NEXT: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s32), implicit [[ICMP]](s1)
70+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ATOMIC_CMPXCHG]](s32)
71+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s1) = COPY [[ICMP]](s1)
72+
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](s32), implicit [[COPY4]](s1)
6773
%0:_(p3) = COPY $vgpr0
6874
%1:_(s32) = COPY $vgpr1
6975
%2:_(s32) = COPY $vgpr2
@@ -87,7 +93,9 @@ body: |
8793
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY1]](s64)
8894
; CHECK-NEXT: [[AMDGPU_ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_AMDGPU_ATOMIC_CMPXCHG [[COPY]](p1), [[BUILD_VECTOR]] :: (load store syncscope("agent-one-as") monotonic monotonic (s64), addrspace 1)
8995
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AMDGPU_ATOMIC_CMPXCHG]](s64), [[COPY1]]
90-
; CHECK-NEXT: S_ENDPGM 0, implicit [[AMDGPU_ATOMIC_CMPXCHG]](s64), implicit [[ICMP]](s1)
96+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[AMDGPU_ATOMIC_CMPXCHG]](s64)
97+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s1) = COPY [[ICMP]](s1)
98+
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](s64), implicit [[COPY4]](s1)
9199
%0:_(p1) = COPY $vgpr0_vgpr1
92100
%1:_(s64) = COPY $vgpr2_vgpr3
93101
%2:_(s64) = COPY $vgpr4_vgpr5
@@ -110,7 +118,9 @@ body: |
110118
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr3_vgpr4
111119
; CHECK-NEXT: [[ATOMIC_CMPXCHG:%[0-9]+]]:_(s64) = G_ATOMIC_CMPXCHG [[COPY]](p3), [[COPY1]], [[COPY2]] :: (load store syncscope("agent-one-as") monotonic monotonic (s64), addrspace 3)
112120
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ATOMIC_CMPXCHG]](s64), [[COPY1]]
113-
; CHECK-NEXT: S_ENDPGM 0, implicit [[ATOMIC_CMPXCHG]](s64), implicit [[ICMP]](s1)
121+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[ATOMIC_CMPXCHG]](s64)
122+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s1) = COPY [[ICMP]](s1)
123+
; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY3]](s64), implicit [[COPY4]](s1)
114124
%0:_(p3) = COPY $vgpr0
115125
%1:_(s64) = COPY $vgpr1_vgpr2
116126
%2:_(s64) = COPY $vgpr3_vgpr4

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,10 @@ body: |
2121
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
2222
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[COPY2]]
2323
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
24+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
2425
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
2526
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]]
26-
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1)
27+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY3]](s1)
2728
; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32)
2829
; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32)
2930
%0:_(s32) = COPY $vgpr0
@@ -56,7 +57,8 @@ body: |
5657
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
5758
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[C]]
5859
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
59-
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1)
60+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
61+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY2]](s1)
6062
; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32)
6163
; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32)
6264
%0:_(s32) = COPY $vgpr0
@@ -86,8 +88,10 @@ body: |
8688
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[COPY]]
8789
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C]]
8890
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
89-
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1)
90-
; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32)
91+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ADD]](s32)
92+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
93+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY3]](s1)
94+
; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32)
9195
; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32)
9296
%0:_(s32) = COPY $vgpr0
9397
%1:_(s32) = COPY $vgpr1
@@ -117,8 +121,10 @@ body: |
117121
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]]
118122
; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]]
119123
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
120-
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1)
121-
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64)
124+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[MV]](s64)
125+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s1) = COPY [[XOR]](s1)
126+
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[COPY3]](s1)
127+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](s64)
122128
; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32)
123129
%0:_(s64) = COPY $vgpr0_vgpr1
124130
%1:_(s64) = COPY $vgpr2_vgpr3
@@ -172,11 +178,12 @@ body: |
172178
; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]]
173179
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1)
174180
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1)
181+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY [[BITCAST2]](<2 x s16>)
175182
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
176183
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]]
177184
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]]
178185
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND2]](s32), [[AND3]](s32)
179-
; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
186+
; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](<2 x s16>)
180187
; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>)
181188
%0:_(<2 x s16>) = COPY $vgpr0
182189
%1:_(<2 x s16>) = COPY $vgpr1
@@ -360,13 +367,14 @@ body: |
360367
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1)
361368
; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1)
362369
; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR3]](s1)
370+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s16>) = COPY [[CONCAT_VECTORS]](<4 x s16>)
363371
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
364372
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]]
365373
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]]
366374
; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]]
367375
; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C3]]
368376
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32)
369-
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
377+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY5]](<4 x s16>)
370378
; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>)
371379
%0:_(<4 x s16>) = COPY $vgpr0_vgpr1
372380
%1:_(<4 x s16>) = COPY $vgpr1_vgpr2
@@ -403,11 +411,12 @@ body: |
403411
; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]]
404412
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1)
405413
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1)
414+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY [[BUILD_VECTOR]](<2 x s32>)
406415
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
407416
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]]
408417
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]]
409418
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32)
410-
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
419+
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY2]](<2 x s32>)
411420
; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>)
412421
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
413422
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3

0 commit comments

Comments
 (0)