Skip to content

[MachinePipeliner] Fix store-store dependences (#72508) #72575

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions llvm/lib/CodeGen/MachinePipeliner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2225,7 +2225,7 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
}

/// Return true for an order or output dependence that is loop carried
/// potentially. A dependence is loop carried if the destination defines a valu
/// potentially. A dependence is loop carried if the destination defines a value
/// that may be used or defined by the source in a subsequent iteration.
bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
bool isSucc) {
Expand All @@ -2251,10 +2251,12 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
return true;

// Only chain dependences between a load and store can be loop carried.
if (!DI->mayStore() || !SI->mayLoad())
if (!DI->mayLoadOrStore() || !SI->mayLoadOrStore())
return false;

// The conservative assumption is that a dependence between memory operations
// may be loop carried. The following code checks when it can be proved that
// there is no loop carried dependence.
unsigned DeltaS, DeltaD;
if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
return true;
Expand Down
84 changes: 84 additions & 0 deletions llvm/test/CodeGen/PowerPC/sms-store-dependence.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs\
; RUN: -mcpu=pwr9 --ppc-enable-pipeliner 2>&1 | FileCheck %s

; Test that the pipeliner schedules the store instructions correctly. Since
; there is a dependence between the store, they cannot be scheduled further than
; MII cycles/instructions apart. That is, the first store cannot occur multiple
; times before the second ctore in the schedule.
define dso_local void @comp_method(ptr noalias nocapture noundef readonly %0, ptr nocapture noundef writeonly %1, ptr nocapture noundef writeonly %2, i32 noundef %3, i32 noundef %4, i32 noundef %5, i32 noundef %6, i64 %v1) local_unnamed_addr {
; CHECK-LABEL: comp_method:
; CHECK: # %bb.0:
; CHECK-NEXT: extsw 7, 8
; CHECK-NEXT: extsw 8, 9
; CHECK-NEXT: clrldi 9, 6, 32
; CHECK-NEXT: addi 6, 3, -1
; CHECK-NEXT: mtctr 9
; CHECK-NEXT: li 11, 0
; CHECK-NEXT: sradi 12, 11, 2
; CHECK-NEXT: add 5, 5, 8
; CHECK-NEXT: li 8, 2
; CHECK-NEXT: li 3, 8
; CHECK-NEXT: addi 11, 7, 0
; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
; CHECK-NEXT: lbzu 9, 1(6)
; CHECK-NEXT: add 12, 12, 10
; CHECK-NEXT: extsb 9, 9
; CHECK-NEXT: stbx 8, 4, 9
; CHECK-NEXT: add 9, 9, 12
; CHECK-NEXT: bdz .LBB0_2
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1:
; CHECK-NEXT: lbzu 0, 1(6)
; CHECK-NEXT: sradi 12, 11, 2
; CHECK-NEXT: add 11, 11, 7
; CHECK-NEXT: add 12, 12, 10
; CHECK-NEXT: sldi 30, 9, 2
; CHECK-NEXT: add 9, 9, 30
; CHECK-NEXT: extsb 0, 0
; CHECK-NEXT: stbx 3, 5, 9
; CHECK-NEXT: add 9, 0, 12
; CHECK-NEXT: stbx 8, 4, 0
; CHECK-NEXT: bdnz .LBB0_1
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: sldi 4, 9, 2
; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload
; CHECK-NEXT: add 4, 9, 4
; CHECK-NEXT: stbx 3, 5, 4
; CHECK-NEXT: blr
%8 = icmp sgt i32 %3, 64
tail call void @llvm.assume(i1 %8)
%9 = and i32 %3, 1
%10 = icmp eq i32 %9, 0
tail call void @llvm.assume(i1 %10)
%11 = sext i32 %5 to i64
%12 = sext i32 %6 to i64
%13 = zext nneg i32 %3 to i64
%14 = getelementptr i8, ptr %2, i64 %12
br label %16

15:
ret void

16:
%17 = phi i64 [ 0, %7 ], [ %24, %16 ]
%18 = getelementptr inbounds i8, ptr %0, i64 %17
%19 = load i8, ptr %18, align 1
%20 = sext i8 %19 to i64
%21 = getelementptr inbounds i8, ptr %1, i64 %20
store i8 2, ptr %21, align 1
%22 = mul nsw i64 %17, %11
%a1 = ashr i64 %22, 2
%a2 = add i64 %a1, %v1
%a3 = add i64 %20, %a2
%a4 = mul nsw i64 %a3, 5
%23 = getelementptr i8, ptr %14, i64 %a4
store i8 8, ptr %23, align 1
%24 = add nuw nsw i64 %17, 1
%25 = icmp eq i64 %24, %13
br i1 %25, label %15, label %16
}

declare void @llvm.assume(i1 noundef) #1

attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }