Skip to content

Commit a19c7c4

Browse files
authored
[MachinePipeliner] Fix store-store dependences (llvm#72575)
The pipeliner needs to mark store-store order dependences as loop carried dependences. Otherwise, the stores may be scheduled further apart than the MII. The order dependences implies that the first instance of the dependent store is scheduled before the second instance of the source store instruction.
1 parent 3850131 commit a19c7c4

File tree

2 files changed

+89
-3
lines changed

2 files changed

+89
-3
lines changed

llvm/lib/CodeGen/MachinePipeliner.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2225,7 +2225,7 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
22252225
}
22262226

22272227
/// Return true for an order or output dependence that is loop carried
2228-
/// potentially. A dependence is loop carried if the destination defines a valu
2228+
/// potentially. A dependence is loop carried if the destination defines a value
22292229
/// that may be used or defined by the source in a subsequent iteration.
22302230
bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
22312231
bool isSucc) {
@@ -2251,10 +2251,12 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
22512251
SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
22522252
return true;
22532253

2254-
// Only chain dependences between a load and store can be loop carried.
2255-
if (!DI->mayStore() || !SI->mayLoad())
2254+
if (!DI->mayLoadOrStore() || !SI->mayLoadOrStore())
22562255
return false;
22572256

2257+
// The conservative assumption is that a dependence between memory operations
2258+
// may be loop carried. The following code checks when it can be proved that
2259+
// there is no loop carried dependence.
22582260
unsigned DeltaS, DeltaD;
22592261
if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
22602262
return true;
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs\
3+
; RUN: -mcpu=pwr9 --ppc-enable-pipeliner 2>&1 | FileCheck %s
4+
5+
; Test that the pipeliner schedules the store instructions correctly. Since
6+
; there is a dependence between the store, they cannot be scheduled further than
7+
; MII cycles/instructions apart. That is, the first store cannot occur multiple
8+
; times before the second ctore in the schedule.
9+
define dso_local void @comp_method(ptr noalias nocapture noundef readonly %0, ptr nocapture noundef writeonly %1, ptr nocapture noundef writeonly %2, i32 noundef %3, i32 noundef %4, i32 noundef %5, i32 noundef %6, i64 %v1) local_unnamed_addr {
10+
; CHECK-LABEL: comp_method:
11+
; CHECK: # %bb.0:
12+
; CHECK-NEXT: extsw 7, 8
13+
; CHECK-NEXT: extsw 8, 9
14+
; CHECK-NEXT: clrldi 9, 6, 32
15+
; CHECK-NEXT: addi 6, 3, -1
16+
; CHECK-NEXT: mtctr 9
17+
; CHECK-NEXT: li 11, 0
18+
; CHECK-NEXT: sradi 12, 11, 2
19+
; CHECK-NEXT: add 5, 5, 8
20+
; CHECK-NEXT: li 8, 2
21+
; CHECK-NEXT: li 3, 8
22+
; CHECK-NEXT: addi 11, 7, 0
23+
; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
24+
; CHECK-NEXT: lbzu 9, 1(6)
25+
; CHECK-NEXT: add 12, 12, 10
26+
; CHECK-NEXT: extsb 9, 9
27+
; CHECK-NEXT: stbx 8, 4, 9
28+
; CHECK-NEXT: add 9, 9, 12
29+
; CHECK-NEXT: bdz .LBB0_2
30+
; CHECK-NEXT: .p2align 4
31+
; CHECK-NEXT: .LBB0_1:
32+
; CHECK-NEXT: lbzu 0, 1(6)
33+
; CHECK-NEXT: sradi 12, 11, 2
34+
; CHECK-NEXT: add 11, 11, 7
35+
; CHECK-NEXT: add 12, 12, 10
36+
; CHECK-NEXT: sldi 30, 9, 2
37+
; CHECK-NEXT: add 9, 9, 30
38+
; CHECK-NEXT: extsb 0, 0
39+
; CHECK-NEXT: stbx 3, 5, 9
40+
; CHECK-NEXT: add 9, 0, 12
41+
; CHECK-NEXT: stbx 8, 4, 0
42+
; CHECK-NEXT: bdnz .LBB0_1
43+
; CHECK-NEXT: .LBB0_2:
44+
; CHECK-NEXT: sldi 4, 9, 2
45+
; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload
46+
; CHECK-NEXT: add 4, 9, 4
47+
; CHECK-NEXT: stbx 3, 5, 4
48+
; CHECK-NEXT: blr
49+
%8 = icmp sgt i32 %3, 64
50+
tail call void @llvm.assume(i1 %8)
51+
%9 = and i32 %3, 1
52+
%10 = icmp eq i32 %9, 0
53+
tail call void @llvm.assume(i1 %10)
54+
%11 = sext i32 %5 to i64
55+
%12 = sext i32 %6 to i64
56+
%13 = zext nneg i32 %3 to i64
57+
%14 = getelementptr i8, ptr %2, i64 %12
58+
br label %16
59+
60+
15:
61+
ret void
62+
63+
16:
64+
%17 = phi i64 [ 0, %7 ], [ %24, %16 ]
65+
%18 = getelementptr inbounds i8, ptr %0, i64 %17
66+
%19 = load i8, ptr %18, align 1
67+
%20 = sext i8 %19 to i64
68+
%21 = getelementptr inbounds i8, ptr %1, i64 %20
69+
store i8 2, ptr %21, align 1
70+
%22 = mul nsw i64 %17, %11
71+
%a1 = ashr i64 %22, 2
72+
%a2 = add i64 %a1, %v1
73+
%a3 = add i64 %20, %a2
74+
%a4 = mul nsw i64 %a3, 5
75+
%23 = getelementptr i8, ptr %14, i64 %a4
76+
store i8 8, ptr %23, align 1
77+
%24 = add nuw nsw i64 %17, 1
78+
%25 = icmp eq i64 %24, %13
79+
br i1 %25, label %15, label %16
80+
}
81+
82+
declare void @llvm.assume(i1 noundef) #1
83+
84+
attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }

0 commit comments

Comments
 (0)