Skip to content

Commit e3a0775

Browse files
committed
[VectorCombine] foldExtractedCmps - (re-)enable fold on non-commutative binops
#114901 exposed that foldExtractedCmps didn't account for non-commutative binops, and were disabled by 05e838f This patch re-enables support for non-commutative binops by ensuring that the LHS/RHS arg order of the binop is retained.
1 parent 38fffa6 commit e3a0775

File tree

3 files changed

+16
-19
lines changed

3 files changed

+16
-19
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1039,10 +1039,6 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
10391039
if (!BI || !I.getType()->isIntegerTy(1))
10401040
return false;
10411041

1042-
// TODO: Support non-commutative binary ops.
1043-
if (!BI->isCommutative())
1044-
return false;
1045-
10461042
// The compare predicates should match, and each compare should have a
10471043
// constant operand.
10481044
Value *B0 = I.getOperand(0), *B1 = I.getOperand(1);
@@ -1066,6 +1062,8 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
10661062
ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1);
10671063
if (!ConvertToShuf)
10681064
return false;
1065+
assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1066+
"Unknown ExtractElementInst");
10691067

10701068
// The original scalar pattern is:
10711069
// binop i1 (cmp Pred (ext X, Index0), C0), (cmp Pred (ext X, Index1), C1)
@@ -1117,9 +1115,10 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
11171115
CmpC[Index0] = C0;
11181116
CmpC[Index1] = C1;
11191117
Value *VCmp = Builder.CreateCmp(Pred, X, ConstantVector::get(CmpC));
1120-
11211118
Value *Shuf = createShiftShuffle(VCmp, ExpensiveIndex, CheapIndex, Builder);
1122-
Value *VecLogic = Builder.CreateBinOp(BI->getOpcode(), VCmp, Shuf);
1119+
Value *LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1120+
Value *RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1121+
Value *VecLogic = Builder.CreateBinOp(BI->getOpcode(), LHS, RHS);
11231122
Value *NewExt = Builder.CreateExtractElement(VecLogic, CheapIndex);
11241123
replaceValue(I, *NewExt);
11251124
++NumVecCmpBO;

llvm/test/Transforms/VectorCombine/X86/extract-cmp-binop.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ define i1 @icmp_xor_v4i32(<4 x i32> %a) {
5454
; CHECK-LABEL: @icmp_xor_v4i32(
5555
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A:%.*]], <i32 poison, i32 -8, i32 poison, i32 42>
5656
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
57-
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], [[SHIFT]]
57+
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[SHIFT]], [[TMP1]]
5858
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
5959
; CHECK-NEXT: ret i1 [[R]]
6060
;
@@ -80,7 +80,7 @@ define i1 @icmp_add_v8i32(<8 x i32> %a) {
8080
; AVX-LABEL: @icmp_add_v8i32(
8181
; AVX-NEXT: [[TMP1:%.*]] = icmp eq <8 x i32> [[A:%.*]], <i32 poison, i32 poison, i32 -8, i32 poison, i32 poison, i32 poison, i32 poison, i32 42>
8282
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <8 x i32> <i32 poison, i32 poison, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
83-
; AVX-NEXT: [[TMP2:%.*]] = add <8 x i1> [[TMP1]], [[SHIFT]]
83+
; AVX-NEXT: [[TMP2:%.*]] = add <8 x i1> [[SHIFT]], [[TMP1]]
8484
; AVX-NEXT: [[R:%.*]] = extractelement <8 x i1> [[TMP2]], i64 2
8585
; AVX-NEXT: ret i1 [[R]]
8686
;
@@ -131,7 +131,7 @@ define i1 @icmp_xor_v4i32_multiuse(<4 x i32> %a) {
131131
; CHECK-NEXT: call void @use(i32 [[E2]])
132132
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A]], <i32 poison, i32 -8, i32 poison, i32 42>
133133
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
134-
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], [[SHIFT]]
134+
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[SHIFT]], [[TMP1]]
135135
; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
136136
; CHECK-NEXT: call void @use(i1 [[R]])
137137
; CHECK-NEXT: ret i1 [[R]]

llvm/test/Transforms/VectorCombine/X86/pr114901.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,10 @@ define i1 @PR114901(<4 x i32> %a) {
1515
;
1616
; AVX-LABEL: define i1 @PR114901(
1717
; AVX-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
18-
; AVX-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[A]], i32 1
19-
; AVX-NEXT: [[E3:%.*]] = extractelement <4 x i32> [[A]], i32 3
20-
; AVX-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[E1]], -8
21-
; AVX-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[E3]], 42
22-
; AVX-NEXT: [[R:%.*]] = ashr i1 [[CMP3]], [[CMP1]]
18+
; AVX-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A]], <i32 poison, i32 -8, i32 poison, i32 42>
19+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
20+
; AVX-NEXT: [[TMP2:%.*]] = ashr <4 x i1> [[SHIFT]], [[TMP1]]
21+
; AVX-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
2322
; AVX-NEXT: ret i1 [[R]]
2423
;
2524
%e1 = extractelement <4 x i32> %a, i32 1
@@ -42,11 +41,10 @@ define i1 @PR114901_flip(<4 x i32> %a) {
4241
;
4342
; AVX-LABEL: define i1 @PR114901_flip(
4443
; AVX-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
45-
; AVX-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[A]], i32 1
46-
; AVX-NEXT: [[E3:%.*]] = extractelement <4 x i32> [[A]], i32 3
47-
; AVX-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[E1]], -8
48-
; AVX-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[E3]], 42
49-
; AVX-NEXT: [[R:%.*]] = ashr i1 [[CMP1]], [[CMP3]]
44+
; AVX-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A]], <i32 poison, i32 -8, i32 poison, i32 42>
45+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
46+
; AVX-NEXT: [[TMP2:%.*]] = ashr <4 x i1> [[TMP1]], [[SHIFT]]
47+
; AVX-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
5048
; AVX-NEXT: ret i1 [[R]]
5149
;
5250
%e1 = extractelement <4 x i32> %a, i32 1

0 commit comments

Comments
 (0)