Skip to content

Commit e409204

Browse files
authored
VectorCombine: teach foldExtractedCmps about samesign (#122883)
Follow up on 4a0d53a (PatternMatch: migrate to CmpPredicate) to get rid of one of the FIXMEs it introduced by replacing a predicate comparison with CmpPredicate::getMatching.
1 parent e87f94a commit e409204

File tree

2 files changed

+43
-4
lines changed

2 files changed

+43
-4
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,10 +1097,12 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
10971097
Instruction *I0, *I1;
10981098
Constant *C0, *C1;
10991099
CmpPredicate P0, P1;
1100-
// FIXME: Use CmpPredicate::getMatching here.
11011100
if (!match(B0, m_Cmp(P0, m_Instruction(I0), m_Constant(C0))) ||
1102-
!match(B1, m_Cmp(P1, m_Instruction(I1), m_Constant(C1))) ||
1103-
P0 != static_cast<CmpInst::Predicate>(P1))
1101+
!match(B1, m_Cmp(P1, m_Instruction(I1), m_Constant(C1))))
1102+
return false;
1103+
1104+
auto MatchingPred = CmpPredicate::getMatching(P0, P1);
1105+
if (!MatchingPred)
11041106
return false;
11051107

11061108
// The compare operands must be extracts of the same vector with constant
@@ -1121,7 +1123,7 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
11211123

11221124
// The original scalar pattern is:
11231125
// binop i1 (cmp Pred (ext X, Index0), C0), (cmp Pred (ext X, Index1), C1)
1124-
CmpInst::Predicate Pred = P0;
1126+
CmpInst::Predicate Pred = *MatchingPred;
11251127
unsigned CmpOpcode =
11261128
CmpInst::isFPPredicate(Pred) ? Instruction::FCmp : Instruction::ICmp;
11271129
auto *VecTy = dyn_cast<FixedVectorType>(X->getType());

llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,22 @@ define i1 @icmp_xor_v4i32(<4 x i32> %a) {
6666
ret i1 %r
6767
}
6868

69+
define i1 @icmp_samesign_xor_v4i32(<4 x i32> %a) {
70+
; CHECK-LABEL: @icmp_samesign_xor_v4i32(
71+
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A:%.*]], <i32 poison, i32 -8, i32 poison, i32 42>
72+
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
73+
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[SHIFT]], [[TMP1]]
74+
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
75+
; CHECK-NEXT: ret i1 [[R]]
76+
;
77+
%e1 = extractelement <4 x i32> %a, i32 3
78+
%e2 = extractelement <4 x i32> %a, i32 1
79+
%cmp1 = icmp samesign ugt i32 %e1, 42
80+
%cmp2 = icmp sgt i32 %e2, -8
81+
%r = xor i1 %cmp1, %cmp2
82+
ret i1 %r
83+
}
84+
6985
; add is not canonical (should be xor), but that is ok.
7086

7187
define i1 @icmp_add_v8i32(<8 x i32> %a) {
@@ -146,6 +162,27 @@ define i1 @icmp_xor_v4i32_multiuse(<4 x i32> %a) {
146162
ret i1 %r
147163
}
148164

165+
define i1 @icmp_samesign_xor_v4i32_multiuse(<4 x i32> %a) {
166+
; CHECK-LABEL: @icmp_samesign_xor_v4i32_multiuse(
167+
; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 1
168+
; CHECK-NEXT: call void @use(i32 [[E2]])
169+
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A]], <i32 poison, i32 -8, i32 poison, i32 42>
170+
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
171+
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[SHIFT]], [[TMP1]]
172+
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
173+
; CHECK-NEXT: call void @use(i1 [[R]])
174+
; CHECK-NEXT: ret i1 [[R]]
175+
;
176+
%e1 = extractelement <4 x i32> %a, i32 3
177+
%e2 = extractelement <4 x i32> %a, i32 1
178+
call void @use(i32 %e2)
179+
%cmp1 = icmp sgt i32 %e1, 42
180+
%cmp2 = icmp samesign ugt i32 %e2, -8
181+
%r = xor i1 %cmp1, %cmp2
182+
call void @use(i1 %r)
183+
ret i1 %r
184+
}
185+
149186
; Negative test - this could CSE/simplify.
150187

151188
define i1 @same_extract_index(<4 x i32> %a) {

0 commit comments

Comments
 (0)