Skip to content

Commit 3b6edef

Browse files
committed
[SLP]Fix a crash when reorder masked gather nodes with reused scalars.
If the masked gather nodes must be reordered, we can just reorder scalars, just like for gather nodes. But if the node contains reused scalars, it must be handled same way as a regular vectorizable node, since need to reorder reused mask, not the scalars directly. Differential Revision: https://reviews.llvm.org/D128360
1 parent df6afee commit 3b6edef

File tree

2 files changed

+86
-1
lines changed

2 files changed

+86
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3940,7 +3940,9 @@ bool BoUpSLP::canReorderOperands(
39403940
// Add ScatterVectorize nodes to the list of operands, where just
39413941
// reordering of the scalars is required. Similar to the gathers, so
39423942
// simply add to the list of gathered ops.
3943-
if (TE->State != TreeEntry::Vectorize)
3943+
// If there are reused scalars, process this node as a regular vectorize
3944+
// node, just reorder reuses mask.
3945+
if (TE->State != TreeEntry::Vectorize && TE->ReuseShuffleIndices.empty())
39443946
GatherOps.push_back(TE);
39453947
continue;
39463948
}
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -slp-vectorizer -mattr=+avx512f -mtriple=x86_64 -S < %s | FileCheck %s
3+
4+
define void @test(float* noalias %0, float* %p) {
5+
; CHECK-LABEL: @test(
6+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x float*> poison, float* [[P:%.*]], i32 0
7+
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x float*> [[TMP2]], <8 x float*> poison, <8 x i32> zeroinitializer
8+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, <8 x float*> [[SHUFFLE]], <8 x i64> <i64 15, i64 4, i64 5, i64 0, i64 2, i64 6, i64 7, i64 8>
9+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP0:%.*]], i64 2
10+
; CHECK-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> [[TMP3]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef)
11+
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x float> [[TMP5]], <8 x float> poison, <16 x i32> <i32 4, i32 3, i32 0, i32 1, i32 2, i32 0, i32 1, i32 2, i32 0, i32 2, i32 5, i32 6, i32 7, i32 5, i32 6, i32 7>
12+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x float> <float poison, float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, <16 x float> [[SHUFFLE1]], <16 x i32> <i32 18, i32 19, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
13+
; CHECK-NEXT: [[TMP7:%.*]] = fadd reassoc nsz arcp contract afn <16 x float> [[SHUFFLE1]], [[TMP6]]
14+
; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <16 x float> [[TMP7]], <16 x float> poison, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 9, i32 0, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
15+
; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[TMP4]] to <16 x float>*
16+
; CHECK-NEXT: store <16 x float> [[SHUFFLE2]], <16 x float>* [[TMP8]], align 4
17+
; CHECK-NEXT: ret void
18+
;
19+
%2 = getelementptr inbounds float, float* %p, i64 2
20+
%3 = getelementptr inbounds float, float* %p, i64 4
21+
%4 = load float, float* %3, align 4
22+
%5 = getelementptr inbounds float, float* %p, i64 5
23+
%6 = load float, float* %5, align 16
24+
%7 = getelementptr inbounds float, float* %p, i64 15
25+
%8 = load float, float* %7, align 4
26+
%9 = fadd reassoc nsz arcp contract afn float %8, 0.000000e+00
27+
%10 = getelementptr inbounds float, float* %0, i64 2
28+
store float %9, float* %10, align 4
29+
%11 = fadd reassoc nsz arcp contract afn float %4, 0.000000e+00
30+
%12 = getelementptr inbounds float, float* %0, i64 3
31+
store float %11, float* %12, align 4
32+
%13 = fadd reassoc nsz arcp contract afn float %6, 0.000000e+00
33+
%14 = getelementptr inbounds float, float* %0, i64 4
34+
store float %13, float* %14, align 4
35+
%15 = fadd reassoc nsz arcp contract afn float %8, 0.000000e+00
36+
%16 = getelementptr inbounds float, float* %0, i64 5
37+
store float %15, float* %16, align 4
38+
%17 = fadd reassoc nsz arcp contract afn float %4, 0.000000e+00
39+
%18 = load float, float* %p, align 16
40+
%19 = getelementptr inbounds float, float* %0, i64 6
41+
store float %17, float* %19, align 4
42+
%20 = fadd reassoc nsz arcp contract afn float %6, 0.000000e+00
43+
%21 = getelementptr inbounds float, float* %0, i64 7
44+
store float %20, float* %21, align 4
45+
%22 = fadd reassoc nsz arcp contract afn float %8, 0.000000e+00
46+
%23 = load float, float* %2, align 8
47+
%24 = getelementptr inbounds float, float* %0, i64 8
48+
store float %22, float* %24, align 4
49+
%25 = fadd reassoc nsz arcp contract afn float %4, %18
50+
%26 = getelementptr inbounds float, float* %0, i64 9
51+
store float %25, float* %26, align 4
52+
%27 = fadd reassoc nsz arcp contract afn float %6, 0.000000e+00
53+
%28 = getelementptr inbounds float, float* %0, i64 10
54+
store float %27, float* %28, align 4
55+
%29 = fadd reassoc nsz arcp contract afn float %8, %23
56+
%30 = getelementptr inbounds float, float* %0, i64 11
57+
store float %29, float* %30, align 4
58+
%31 = getelementptr inbounds float, float* %p, i64 6
59+
%32 = load float, float* %31, align 4
60+
%33 = fadd reassoc nsz arcp contract afn float %32, 0.000000e+00
61+
%34 = getelementptr inbounds float, float* %0, i64 12
62+
store float %33, float* %34, align 4
63+
%35 = getelementptr inbounds float, float* %p, i64 7
64+
%36 = load float, float* %35, align 8
65+
%37 = fadd reassoc nsz arcp contract afn float %36, 0.000000e+00
66+
%38 = getelementptr inbounds float, float* %0, i64 13
67+
store float %37, float* %38, align 4
68+
%39 = getelementptr inbounds float, float* %p, i64 8
69+
%40 = load float, float* %39, align 4
70+
%41 = fadd reassoc nsz arcp contract afn float %40, 0.000000e+00
71+
%42 = getelementptr inbounds float, float* %0, i64 14
72+
store float %41, float* %42, align 4
73+
%43 = fadd reassoc nsz arcp contract afn float %32, 0.000000e+00
74+
%44 = getelementptr inbounds float, float* %0, i64 15
75+
store float %43, float* %44, align 4
76+
%45 = fadd reassoc nsz arcp contract afn float %36, 0.000000e+00
77+
%46 = getelementptr inbounds float, float* %0, i64 16
78+
store float %45, float* %46, align 4
79+
%47 = fadd reassoc nsz arcp contract afn float %40, 0.000000e+00
80+
%48 = getelementptr inbounds float, float* %0, i64 17
81+
store float %47, float* %48, align 4
82+
ret void
83+
}

0 commit comments

Comments
 (0)