Skip to content

Commit cd29126

Browse files
[SLP]Fix PR87133: crash because of different altopcodes for cmps after reordering.
If the node has cmp instruction with 3 or more different but swappable predicates, need to keep same kind of main/alternate opcodes to avoid incorrect detection of opcodes after reordering. Reordering changes the order and we may erroneously consider swappable opcodes as non-compatible/alternate, which may lead to a later compiler crash. Reviewers: RKSimon Reviewed By: RKSimon Pull Request: #87267
1 parent 23616c6 commit cd29126

File tree

3 files changed

+104
-23
lines changed

3 files changed

+104
-23
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,29 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
658658
unsigned AltOpcode = Opcode;
659659
unsigned AltIndex = BaseIndex;
660660

661+
bool SwappedPredsCompatible = [&]() {
662+
if (!IsCmpOp)
663+
return false;
664+
SetVector<unsigned> UniquePreds, UniqueNonSwappedPreds;
665+
UniquePreds.insert(BasePred);
666+
UniqueNonSwappedPreds.insert(BasePred);
667+
for (Value *V : VL) {
668+
auto *I = dyn_cast<CmpInst>(V);
669+
if (!I)
670+
return false;
671+
CmpInst::Predicate CurrentPred = I->getPredicate();
672+
CmpInst::Predicate SwappedCurrentPred =
673+
CmpInst::getSwappedPredicate(CurrentPred);
674+
UniqueNonSwappedPreds.insert(CurrentPred);
675+
if (!UniquePreds.contains(CurrentPred) &&
676+
!UniquePreds.contains(SwappedCurrentPred))
677+
UniquePreds.insert(CurrentPred);
678+
}
679+
// Total number of predicates > 2, but if consider swapped predicates
680+
// compatible only 2, consider swappable predicates as compatible opcodes,
681+
// not alternate.
682+
return UniqueNonSwappedPreds.size() > 2 && UniquePreds.size() == 2;
683+
}();
661684
// Check for one alternate opcode from another BinaryOperator.
662685
// TODO - generalize to support all operators (types, calls etc.).
663686
auto *IBase = cast<Instruction>(VL[BaseIndex]);
@@ -710,7 +733,7 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
710733
CmpInst::Predicate SwappedCurrentPred =
711734
CmpInst::getSwappedPredicate(CurrentPred);
712735

713-
if (E == 2 &&
736+
if ((E == 2 || SwappedPredsCompatible) &&
714737
(BasePred == CurrentPred || BasePred == SwappedCurrentPred))
715738
continue;
716739

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define i32 @test(ptr %sptr, i64 %0) {
5+
; CHECK-LABEL: define i32 @test(
6+
; CHECK-SAME: ptr [[SPTR:%.*]], i64 [[TMP0:%.*]]) {
7+
; CHECK-NEXT: entry:
8+
; CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP0]] to i32
9+
; CHECK-NEXT: [[IV2:%.*]] = getelementptr i8, ptr [[SPTR]], i64 4
10+
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[IV2]], align 4
11+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[CONV_I]], i32 1
12+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 1>
13+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 1, i32 5, i32 1, i32 poison>
14+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> <i32 poison, i32 poison, i32 poison, i32 0>, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
15+
; CHECK-NEXT: [[TMP6:%.*]] = icmp sle <4 x i32> [[TMP3]], [[TMP5]]
16+
; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <4 x i32> [[TMP3]], [[TMP5]]
17+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i1> [[TMP6]], <4 x i1> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 7>
18+
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
19+
; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i1> [[TMP9]], [[TMP8]]
20+
; CHECK-NEXT: [[TMP11:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP10]])
21+
; CHECK-NEXT: [[AND33:%.*]] = zext i1 [[TMP11]] to i32
22+
; CHECK-NEXT: ret i32 [[AND33]]
23+
;
24+
entry:
25+
%conv.i = trunc i64 %0 to i32
26+
%iv2 = getelementptr i8, ptr %sptr, i64 4
27+
%1 = load i32, ptr %iv2, align 4
28+
%cmp11 = icmp slt i32 %1, %conv.i
29+
%cmp.i57 = icmp eq i32 %1, 0
30+
%or.i5977 = or i1 %cmp.i57, %cmp11
31+
%iv4 = getelementptr i8, ptr %sptr, i64 12
32+
%2 = load i32, ptr %iv4, align 4
33+
%cmp16 = icmp sle i32 %2, %conv.i
34+
%cmp.i62 = icmp eq i32 %2, 0
35+
%or.i6478 = or i1 %cmp.i62, %cmp16
36+
%iv3 = getelementptr i8, ptr %sptr, i64 8
37+
%3 = load i32, ptr %iv3, align 8
38+
%cmp21 = icmp sgt i32 %3, %conv.i
39+
%cmp.i67 = icmp eq i32 %3, 0
40+
%or.i6979 = or i1 %cmp.i67, %cmp21
41+
%iv5 = getelementptr i8, ptr %sptr, i64 16
42+
%4 = load i32, ptr %iv5, align 8
43+
%cmp26 = icmp slt i32 %conv.i, 0
44+
%cmp.i72 = icmp eq i32 %4, 0
45+
%or.i7480 = or i1 %cmp.i72, %cmp26
46+
%and3183 = and i1 %or.i5977, %or.i6478
47+
%and3284 = and i1 %and3183, %or.i6979
48+
%and3385 = and i1 %and3284, %or.i7480
49+
%and33 = zext i1 %and3385 to i32
50+
ret i32 %and33
51+
}

llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -94,17 +94,13 @@ define i1 @logical_or_fcmp(<4 x float> %x) {
9494

9595
define i1 @logical_and_icmp_diff_preds(<4 x i32> %x) {
9696
; SSE-LABEL: @logical_and_icmp_diff_preds(
97-
; SSE-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0
98-
; SSE-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2
99-
; SSE-NEXT: [[C0:%.*]] = icmp ult i32 [[X0]], 0
100-
; SSE-NEXT: [[C2:%.*]] = icmp sgt i32 [[X2]], 0
101-
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> <i32 3, i32 1>
102-
; SSE-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[TMP1]], zeroinitializer
103-
; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
104-
; SSE-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[TMP3]], i1 false
105-
; SSE-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false
106-
; SSE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
107-
; SSE-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[TMP4]], i1 false
97+
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, <4 x i32> <i32 1, i32 3, i32 6, i32 0>
98+
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
99+
; SSE-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[TMP1]], [[TMP2]]
100+
; SSE-NEXT: [[TMP4:%.*]] = icmp ult <4 x i32> [[TMP1]], [[TMP2]]
101+
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
102+
; SSE-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
103+
; SSE-NEXT: [[S3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]])
108104
; SSE-NEXT: ret i1 [[S3]]
109105
;
110106
; AVX-LABEL: @logical_and_icmp_diff_preds(
@@ -391,17 +387,28 @@ define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) {
391387
}
392388

393389
define i1 @logical_and_icmp_clamp_pred_diff(<4 x i32> %x) {
394-
; CHECK-LABEL: @logical_and_icmp_clamp_pred_diff(
395-
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42>
396-
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[X]], <i32 42, i32 42, i32 42, i32 42>
397-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
398-
; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17>
399-
; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
400-
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]])
401-
; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP3]]
402-
; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP7]])
403-
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP6]], i1 [[TMP8]], i1 false
404-
; CHECK-NEXT: ret i1 [[OP_RDX]]
390+
; SSE-LABEL: @logical_and_icmp_clamp_pred_diff(
391+
; SSE-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42>
392+
; SSE-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[X]], <i32 42, i32 42, i32 42, i32 42>
393+
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
394+
; SSE-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17>
395+
; SSE-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
396+
; SSE-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]])
397+
; SSE-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP3]]
398+
; SSE-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP7]])
399+
; SSE-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP6]], i1 [[TMP8]], i1 false
400+
; SSE-NEXT: ret i1 [[OP_RDX]]
401+
;
402+
; AVX-LABEL: @logical_and_icmp_clamp_pred_diff(
403+
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 2, i32 3>
404+
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 42, i32 42, i32 42, i32 poison>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 3>
405+
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> <i32 17, i32 17, i32 17, i32 17, i32 poison, i32 poison, i32 poison, i32 42>, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
406+
; AVX-NEXT: [[TMP4:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[TMP3]]
407+
; AVX-NEXT: [[TMP5:%.*]] = icmp ult <8 x i32> [[TMP2]], [[TMP3]]
408+
; AVX-NEXT: [[TMP6:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15>
409+
; AVX-NEXT: [[TMP7:%.*]] = freeze <8 x i1> [[TMP6]]
410+
; AVX-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP7]])
411+
; AVX-NEXT: ret i1 [[TMP8]]
405412
;
406413
%x0 = extractelement <4 x i32> %x, i32 0
407414
%x1 = extractelement <4 x i32> %x, i32 1

0 commit comments

Comments
 (0)