Skip to content

Commit fa3307e

Browse files
authored
[polly] Make reduction detection checks more robust - part 1 (#75297)
Existing reduction detection algorithm does two types of memory checks before marking a load store pair as reduction. First is to check if load and store are pointing to the same memory. This check right now detects the following case as reduction. sum[0] = sum[1] + A[i] This is because the check compares only base of the memory addresses involved and not their indices. This patch addresses this issue and introduces some debug prints. Added couple of test cases to verify the functionality of patch as well.
1 parent ab70ac6 commit fa3307e

File tree

3 files changed

+107
-3
lines changed

3 files changed

+107
-3
lines changed

polly/lib/Analysis/ScopBuilder.cpp

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2536,18 +2536,39 @@ bool hasIntersectingAccesses(isl::set AllAccs, MemoryAccess *LoadMA,
25362536
bool checkCandidatePairAccesses(MemoryAccess *LoadMA, MemoryAccess *StoreMA,
25372537
isl::set Domain,
25382538
SmallVector<MemoryAccess *, 8> &MemAccs) {
2539+
// First check if the base value is the same.
25392540
isl::map LoadAccs = LoadMA->getAccessRelation();
25402541
isl::map StoreAccs = StoreMA->getAccessRelation();
2541-
2542-
// Skip those with obviously unequal base addresses.
25432542
bool Valid = LoadAccs.has_equal_space(StoreAccs);
2543+
LLVM_DEBUG(dbgs() << " == The accessed space below is "
2544+
<< (Valid ? "" : "not ") << "equal!\n");
2545+
LLVM_DEBUG(LoadMA->dump(); StoreMA->dump());
2546+
2547+
if (Valid) {
2548+
// Then check if they actually access the same memory.
2549+
isl::map R = isl::manage(LoadAccs.copy())
2550+
.intersect_domain(isl::manage(Domain.copy()));
2551+
isl::map W = isl::manage(StoreAccs.copy())
2552+
.intersect_domain(isl::manage(Domain.copy()));
2553+
isl::set RS = R.range();
2554+
isl::set WS = W.range();
2555+
2556+
isl::set InterAccs =
2557+
isl::manage(RS.copy()).intersect(isl::manage(WS.copy()));
2558+
Valid = !InterAccs.is_empty();
2559+
LLVM_DEBUG(dbgs() << " == The accessed memory is " << (Valid ? "" : "not ")
2560+
<< "overlapping!\n");
2561+
}
25442562

2545-
// And check if the remaining for overlap with other memory accesses.
25462563
if (Valid) {
2564+
// Finally, check if they are no other instructions accessing this memory
25472565
isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
25482566
AllAccsRel = AllAccsRel.intersect_domain(Domain);
25492567
isl::set AllAccs = AllAccsRel.range();
25502568
Valid = !hasIntersectingAccesses(AllAccs, LoadMA, StoreMA, Domain, MemAccs);
2569+
2570+
LLVM_DEBUG(dbgs() << " == The accessed memory is " << (Valid ? "not " : "")
2571+
<< "accessed by other instructions!\n");
25512572
}
25522573
return Valid;
25532574
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; RUN: opt %loadPolly -polly-print-scops -disable-output < %s | FileCheck %s
2+
; Verify if the following case is not detected as reduction.
3+
;
4+
; void f(int *A,int *sum) {
5+
; for (int i = 0; i < 1024; i++)
6+
; sum[0] = sum[1] + A[i];
7+
; }
8+
;
9+
; Verify that we don't detect the reduction on sum
10+
;
11+
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
12+
; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_sum[1] };
13+
; CHECK-NEXT:ReadAccess := [Reduction Type: NONE] [Scalar: 0]
14+
; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_A[i0] };
15+
; CHECK-NEXT:MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
16+
; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_sum[0] };
17+
;
18+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
19+
20+
define dso_local void @f(ptr nocapture noundef readonly %A, ptr nocapture noundef %sum) local_unnamed_addr #0 {
21+
entry:
22+
br label %for.body
23+
24+
for.cond.cleanup: ; preds = %for.body
25+
ret void
26+
27+
for.body: ; preds = %entry.split, %for.body
28+
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
29+
%arrayidx = getelementptr inbounds i32, ptr %sum, i64 1
30+
%0 = load i32, ptr %arrayidx
31+
%arrayidx1 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
32+
%1 = load i32, ptr %arrayidx1
33+
%add = add nsw i32 %1, %0
34+
store i32 %add, ptr %sum
35+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
36+
%exitcond.not = icmp eq i64 %indvars.iv.next, 1024
37+
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
38+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; RUN: opt %loadPolly -polly-print-scops -disable-output < %s | FileCheck %s
2+
; Verify if the following case is not detected as reduction.
3+
;
4+
; void f(int *A, int *sum, int i1, int i2) {
5+
; for (int i = 0; i < 1024; i++)
6+
; sum[i2] = sum[i1] + A[i];
7+
; }
8+
;
9+
; Verify that we don't detect the reduction on sum
10+
;
11+
; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
12+
; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_sum[i1] };
13+
; CHECK-NEXT:ReadAccess := [Reduction Type: NONE] [Scalar: 0]
14+
; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_A[i0] };
15+
; CHECK-NEXT:MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
16+
; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_sum[i2] };
17+
;
18+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
19+
20+
; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable
21+
define dso_local void @f(ptr nocapture noundef readonly %A, ptr nocapture noundef %sum, i32 noundef %i1, i32 noundef %i2) local_unnamed_addr #0 {
22+
entry:
23+
br label %entry.split
24+
25+
entry.split: ; preds = %entry
26+
%idxprom = sext i32 %i1 to i64
27+
%arrayidx = getelementptr inbounds i32, ptr %sum, i64 %idxprom
28+
%idxprom3 = sext i32 %i2 to i64
29+
%arrayidx4 = getelementptr inbounds i32, ptr %sum, i64 %idxprom3
30+
br label %for.body
31+
32+
for.cond.cleanup: ; preds = %for.body
33+
ret void
34+
35+
for.body: ; preds = %entry.split, %for.body
36+
%indvars.iv = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %for.body ]
37+
%0 = load i32, ptr %arrayidx, align 4
38+
%arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
39+
%1 = load i32, ptr %arrayidx2, align 4
40+
%add = add nsw i32 %1, %0
41+
store i32 %add, ptr %arrayidx4, align 4
42+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
43+
%exitcond.not = icmp eq i64 %indvars.iv.next, 1024
44+
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
45+
}

0 commit comments

Comments
 (0)