Skip to content

Commit 0c18def

Browse files
committed
[SLP]Allow interleaving check only if it is less than number of elements
Need to check if the interleaving factor is less than total number of elements in loads slice to handle it correctly and avoid compiler crash. Fixes report #112361 (comment)
1 parent b8f9063 commit 0c18def

File tree

2 files changed

+76
-1
lines changed

2 files changed

+76
-1
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7046,7 +7046,8 @@ void BoUpSLP::tryToVectorizeGatheredLoads(
70467046
OrdersType Order;
70477047
SmallVector<Value *> PointerOps;
70487048
// Segmented load detected - vectorize at maximum vector factor.
7049-
if (TTI.isLegalInterleavedAccessType(
7049+
if (InterleaveFactor <= Slice.size() &&
7050+
TTI.isLegalInterleavedAccessType(
70507051
getWidenedType(Slice.front()->getType(), VF),
70517052
InterleaveFactor,
70527053
cast<LoadInst>(Slice.front())->getAlign(),
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux -mattr=+v,+zvl128b < %s | FileCheck %s
3+
4+
define void @test(ptr %a, float %0) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: ptr [[A:%.*]], float [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8
9+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i8, ptr [[TMP1]], i64 84
10+
; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4
11+
; CHECK-NEXT: [[TMP3:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP2]], float 0.000000e+00, float 0.000000e+00)
12+
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr i8, ptr [[TMP1]], i64 28
13+
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
14+
; CHECK-NEXT: [[TMP5:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP4]], float 0.000000e+00, float [[TMP3]])
15+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 8
16+
; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
17+
; CHECK-NEXT: [[TMP7:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP6]], float 0.000000e+00, float 0.000000e+00)
18+
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 68
19+
; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX3]], align 4
20+
; CHECK-NEXT: [[TMP9:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP8]], float 0.000000e+00, float [[TMP5]])
21+
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr i8, ptr [[TMP1]], i64 88
22+
; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
23+
; CHECK-NEXT: [[TMP11:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP10]], float 0.000000e+00, float [[TMP7]])
24+
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr i8, ptr [[TMP1]], i64 92
25+
; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX5]], align 4
26+
; CHECK-NEXT: [[TMP13:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP12]], float 0.000000e+00, float [[TMP11]])
27+
; CHECK-NEXT: [[TMP14:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP0]], float 0.000000e+00, float [[TMP9]])
28+
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr i8, ptr [[TMP1]], i64 96
29+
; CHECK-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
30+
; CHECK-NEXT: [[TMP16:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP15]], float 0.000000e+00, float [[TMP13]])
31+
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr i8, ptr [[TMP1]], i64 80
32+
; CHECK-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX7]], align 4
33+
; CHECK-NEXT: [[TMP18:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP0]], float [[TMP17]], float [[TMP16]])
34+
; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr i8, ptr [[TMP1]], i64 100
35+
; CHECK-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX8]], align 4
36+
; CHECK-NEXT: [[TMP20:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP19]], float 0.000000e+00, float [[TMP14]])
37+
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP18]], [[TMP20]]
38+
; CHECK-NEXT: store float [[ADD]], ptr [[A]], align 4
39+
; CHECK-NEXT: ret void
40+
;
41+
entry:
42+
%1 = load ptr, ptr %a, align 8
43+
%arrayidx = getelementptr i8, ptr %1, i64 84
44+
%2 = load float, ptr %arrayidx, align 4
45+
%3 = tail call float @llvm.fmuladd.f32(float %2, float 0.000000e+00, float 0.000000e+00)
46+
%arrayidx1 = getelementptr i8, ptr %1, i64 28
47+
%4 = load float, ptr %arrayidx1, align 4
48+
%5 = tail call float @llvm.fmuladd.f32(float %4, float 0.000000e+00, float %3)
49+
%arrayidx2 = getelementptr i8, ptr %1, i64 8
50+
%6 = load float, ptr %arrayidx2, align 4
51+
%7 = tail call float @llvm.fmuladd.f32(float %6, float 0.000000e+00, float 0.000000e+00)
52+
%arrayidx3 = getelementptr i8, ptr %1, i64 68
53+
%8 = load float, ptr %arrayidx3, align 4
54+
%9 = tail call float @llvm.fmuladd.f32(float %8, float 0.000000e+00, float %5)
55+
%arrayidx4 = getelementptr i8, ptr %1, i64 88
56+
%10 = load float, ptr %arrayidx4, align 4
57+
%11 = tail call float @llvm.fmuladd.f32(float %10, float 0.000000e+00, float %7)
58+
%arrayidx5 = getelementptr i8, ptr %1, i64 92
59+
%12 = load float, ptr %arrayidx5, align 4
60+
%13 = tail call float @llvm.fmuladd.f32(float %12, float 0.000000e+00, float %11)
61+
%14 = tail call float @llvm.fmuladd.f32(float %0, float 0.000000e+00, float %9)
62+
%arrayidx6 = getelementptr i8, ptr %1, i64 96
63+
%15 = load float, ptr %arrayidx6, align 4
64+
%16 = tail call float @llvm.fmuladd.f32(float %15, float 0.000000e+00, float %13)
65+
%arrayidx7 = getelementptr i8, ptr %1, i64 80
66+
%17 = load float, ptr %arrayidx7, align 4
67+
%18 = tail call float @llvm.fmuladd.f32(float %0, float %17, float %16)
68+
%arrayidx8 = getelementptr i8, ptr %1, i64 100
69+
%19 = load float, ptr %arrayidx8, align 4
70+
%20 = tail call float @llvm.fmuladd.f32(float %19, float 0.000000e+00, float %14)
71+
%add = fadd float %18, %20
72+
store float %add, ptr %a, align 4
73+
ret void
74+
}

0 commit comments

Comments
 (0)