Skip to content

Commit 8e570ab

Browse files
markschimmelMeinersbur
authored andcommitted
Polly - specify address space when creating a pointer to a vector type
Polly incorrectly dropped the address space specified for a load instruction when it vectorized the code. Reviewed By: Meinersbur Differential Revision: https://reviews.llvm.org/D88907
1 parent fc2fb60 commit 8e570ab

File tree

3 files changed

+97
-7
lines changed

3 files changed

+97
-7
lines changed

polly/include/polly/CodeGen/BlockGenerators.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,7 @@ class VectorBlockGenerator : BlockGenerator {
662662
Value *getVectorValue(ScopStmt &Stmt, Value *Old, ValueMapT &VectorMap,
663663
VectorValueMapT &ScalarMaps, Loop *L);
664664

665-
Type *getVectorPtrTy(const Value *V, int Width);
665+
Type *getVectorPtrTy(const Value *V, int Width, unsigned AddrSpace);
666666

667667
/// Load a vector from a set of adjacent scalars
668668
///

polly/lib/CodeGen/BlockGenerators.cpp

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1037,22 +1037,24 @@ Value *VectorBlockGenerator::getVectorValue(ScopStmt &Stmt, Value *Old,
10371037
return Vector;
10381038
}
10391039

1040-
Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width) {
1040+
Type *VectorBlockGenerator::getVectorPtrTy(const Value *Val, int Width,
1041+
unsigned AddrSpace) {
10411042
PointerType *PointerTy = dyn_cast<PointerType>(Val->getType());
10421043
assert(PointerTy && "PointerType expected");
10431044

10441045
Type *ScalarType = PointerTy->getElementType();
10451046
auto *FVTy = FixedVectorType::get(ScalarType, Width);
10461047

1047-
return PointerType::getUnqual(FVTy);
1048+
return PointerType::get(FVTy, AddrSpace);
10481049
}
10491050

10501051
Value *VectorBlockGenerator::generateStrideOneLoad(
10511052
ScopStmt &Stmt, LoadInst *Load, VectorValueMapT &ScalarMaps,
10521053
__isl_keep isl_id_to_ast_expr *NewAccesses, bool NegativeStride = false) {
10531054
unsigned VectorWidth = getVectorWidth();
10541055
auto *Pointer = Load->getPointerOperand();
1055-
Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth);
1056+
auto AS = Pointer->getType()->getPointerAddressSpace();
1057+
Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth, AS);
10561058
unsigned Offset = NegativeStride ? VectorWidth - 1 : 0;
10571059

10581060
Value *NewPointer = generateLocationAccessed(Stmt, Load, ScalarMaps[Offset],
@@ -1081,7 +1083,8 @@ Value *VectorBlockGenerator::generateStrideZeroLoad(
10811083
ScopStmt &Stmt, LoadInst *Load, ValueMapT &BBMap,
10821084
__isl_keep isl_id_to_ast_expr *NewAccesses) {
10831085
auto *Pointer = Load->getPointerOperand();
1084-
Type *VectorPtrType = getVectorPtrTy(Pointer, 1);
1086+
auto AS = Pointer->getType()->getPointerAddressSpace();
1087+
Type *VectorPtrType = getVectorPtrTy(Pointer, 1, AS);
10851088
Value *NewPointer =
10861089
generateLocationAccessed(Stmt, Load, BBMap, VLTS[0], NewAccesses);
10871090
Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType,
@@ -1201,7 +1204,8 @@ void VectorBlockGenerator::copyStore(
12011204
extractScalarValues(Store, VectorMap, ScalarMaps);
12021205

12031206
if (Access.isStrideOne(isl::manage_copy(Schedule))) {
1204-
Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth());
1207+
auto AS = Pointer->getType()->getPointerAddressSpace();
1208+
Type *VectorPtrType = getVectorPtrTy(Pointer, getVectorWidth(), AS);
12051209
Value *NewPointer = generateLocationAccessed(Stmt, Store, ScalarMaps[0],
12061210
VLTS[0], NewAccesses);
12071211

@@ -1339,7 +1343,8 @@ void VectorBlockGenerator::generateScalarVectorLoads(
13391343
continue;
13401344

13411345
auto *Address = getOrCreateAlloca(*MA);
1342-
Type *VectorPtrType = getVectorPtrTy(Address, 1);
1346+
auto AS = Address->getType()->getPointerAddressSpace();
1347+
Type *VectorPtrType = getVectorPtrTy(Address, 1, AS);
13431348
Value *VectorPtr = Builder.CreateBitCast(Address, VectorPtrType,
13441349
Address->getName() + "_p_vec_p");
13451350
auto *Val = Builder.CreateLoad(VectorPtr, Address->getName() + ".reload");
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
; RUN: opt %loadPolly -polly-vectorizer=polly -polly-opt-isl -polly-codegen -S < %s | FileCheck %s
2+
;
3+
; Polly crashed during codegen with an assertion error while trying to generate
4+
; a pointer bitcast from a pointer having an address space to one without
5+
;
6+
; CHECK-LABEL: entry:
7+
; CHECK: load <4 x float>, <4 x float> addrspace(4)*
8+
;
9+
; ModuleID = '/tmp/lud.bc'
10+
source_filename = "lud.c"
11+
; This datalayout was for a 32-bit ARC processor with 512-bit vector extension
12+
target datalayout = "e-m:e-p:32:32-p1:32:32-p3:32:32-p5:32:32-i64:32-f64:32-v64:32-v128:32-a:0:32-v256:32-v512:32-n8:16:32"
13+
; Specify x86 because the ARC backend is still experimental and not built by default
14+
target triple = "x86_64-unknown-unknown"
15+
16+
; Function Attrs: noinline nounwind
17+
define void @LU_decomp_kij_opt(i32 %n, i32 %lda, float addrspace(4)* %A, float addrspace(4)* %scratch) #0 {
18+
entry:
19+
%cmp34 = icmp sgt i32 %n, 0
20+
br i1 %cmp34, label %for.body.lr.ph, label %for.end34
21+
22+
for.body.lr.ph: ; preds = %entry
23+
%0 = add nsw i32 %n, -1
24+
br label %for.body
25+
26+
for.body: ; preds = %for.inc32, %for.body.lr.ph
27+
%k.035 = phi i32 [ 0, %for.body.lr.ph ], [ %add2, %for.inc32 ]
28+
%mul = mul nsw i32 %k.035, %lda
29+
%add = add nsw i32 %mul, %k.035
30+
%arrayidx = getelementptr inbounds float, float addrspace(4)* %A, i32 %add
31+
%1 = load float, float addrspace(4)* %arrayidx, align 4
32+
%conv1 = fdiv arcp float 1.000000e+00, %1
33+
%add2 = add nuw nsw i32 %k.035, 1
34+
%exitcond37 = icmp eq i32 %k.035, %0
35+
br i1 %exitcond37, label %for.end34, label %for.body6.lr.ph
36+
37+
for.body6.lr.ph: ; preds = %for.body
38+
br label %for.body6
39+
40+
for.body6: ; preds = %for.inc29, %for.body6.lr.ph
41+
%i.033 = phi i32 [ %add2, %for.body6.lr.ph ], [ %inc30, %for.inc29 ]
42+
%mul7 = mul nsw i32 %i.033, %lda
43+
%add8 = add nsw i32 %mul7, %k.035
44+
%arrayidx9 = getelementptr inbounds float, float addrspace(4)* %A, i32 %add8
45+
%2 = load float, float addrspace(4)* %arrayidx9, align 4
46+
%mul10 = fmul arcp contract float %conv1, %2
47+
store float %mul10, float addrspace(4)* %arrayidx9, align 4
48+
br label %for.body18
49+
50+
for.body18: ; preds = %for.body18, %for.body6
51+
%j.031 = phi i32 [ %add2, %for.body6 ], [ %inc, %for.body18 ]
52+
%3 = load float, float addrspace(4)* %arrayidx9, align 4
53+
%add23 = add nsw i32 %j.031, %mul
54+
%arrayidx24 = getelementptr inbounds float, float addrspace(4)* %A, i32 %add23
55+
%4 = load float, float addrspace(4)* %arrayidx24, align 4
56+
%mul25 = fmul arcp contract float %3, %4
57+
%add27 = add nsw i32 %j.031, %mul7
58+
%arrayidx28 = getelementptr inbounds float, float addrspace(4)* %A, i32 %add27
59+
%5 = load float, float addrspace(4)* %arrayidx28, align 4
60+
%sub = fsub arcp contract float %5, %mul25
61+
store float %sub, float addrspace(4)* %arrayidx28, align 4
62+
%inc = add nuw nsw i32 %j.031, 1
63+
%exitcond = icmp eq i32 %inc, %n
64+
br i1 %exitcond, label %for.inc29, label %for.body18
65+
66+
for.inc29: ; preds = %for.body18
67+
%inc30 = add nuw nsw i32 %i.033, 1
68+
%exitcond36 = icmp eq i32 %inc30, %n
69+
br i1 %exitcond36, label %for.inc32, label %for.body6
70+
71+
for.inc32: ; preds = %for.inc29
72+
br label %for.body
73+
74+
for.end34: ; preds = %for.body, %entry
75+
ret void
76+
}
77+
78+
attributes #0 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
79+
80+
!llvm.module.flags = !{!0, !1}
81+
!llvm.ident = !{!2}
82+
83+
!0 = !{i32 1, !"ArcIntrinsicCheck", i32 18224056}
84+
!1 = !{i32 1, !"wchar_size", i32 2}
85+
!2 = !{!"clang version 10.0.1 "}

0 commit comments

Comments
 (0)