Skip to content

Commit 51f4829

Browse files
committed
[MBP] Disable aggressive loop rotate in plain mode
Patch https://reviews.llvm.org/D43256 introduced more aggressive loop layout optimization which depends on profile information. If profile information is not available, the statically estimated profile information(generated by BranchProbabilityInfo.cpp) is used. If user program doesn't behave as BranchProbabilityInfo.cpp expected, the layout may be worse. To be conservative this patch restores the original layout algorithm in plain mode. But user can still try the aggressive layout optimization with -force-precise-rotation-cost=true. Differential Revision: https://reviews.llvm.org/D65673 llvm-svn: 369664
1 parent 95cf66d commit 51f4829

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+3254
-3536
lines changed

llvm/lib/CodeGen/MachineBlockPlacement.cpp

Lines changed: 80 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -462,17 +462,20 @@ class MachineBlockPlacement : public MachineFunctionPass {
462462
const MachineBasicBlock *ExitBB,
463463
const BlockFilterSet &LoopBlockSet);
464464
MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop,
465-
const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
465+
const MachineLoop &L,
466+
const BlockFilterSet &LoopBlockSet,
467+
bool HasStaticProfileOnly = false);
466468
MachineBasicBlock *findBestLoopTop(
467469
const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
470+
MachineBasicBlock *findBestLoopTopNoProfile(
471+
const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
468472
MachineBasicBlock *findBestLoopExit(
469-
const MachineLoop &L, const BlockFilterSet &LoopBlockSet,
470-
BlockFrequency &ExitFreq);
473+
const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
471474
BlockFilterSet collectLoopBlockSet(const MachineLoop &L);
472475
void buildLoopChains(const MachineLoop &L);
473476
void rotateLoop(
474477
BlockChain &LoopChain, const MachineBasicBlock *ExitingBB,
475-
BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet);
478+
const BlockFilterSet &LoopBlockSet);
476479
void rotateLoopWithProfile(
477480
BlockChain &LoopChain, const MachineLoop &L,
478481
const BlockFilterSet &LoopBlockSet);
@@ -1947,11 +1950,14 @@ MachineBlockPlacement::FallThroughGains(
19471950
/// At the same time, move it before old top increases the taken branch
19481951
/// to loop exit block, so the reduced taken branch will be compared with
19491952
/// the increased taken branch to the loop exit block.
1953+
///
1954+
/// This pattern is enabled only when HasStaticProfileOnly is false.
19501955
MachineBasicBlock *
19511956
MachineBlockPlacement::findBestLoopTopHelper(
19521957
MachineBasicBlock *OldTop,
19531958
const MachineLoop &L,
1954-
const BlockFilterSet &LoopBlockSet) {
1959+
const BlockFilterSet &LoopBlockSet,
1960+
bool HasStaticProfileOnly) {
19551961
// Check that the header hasn't been fused with a preheader block due to
19561962
// crazy branches. If it has, we need to start with the header at the top to
19571963
// prevent pulling the preheader into the loop body.
@@ -1975,22 +1981,38 @@ MachineBlockPlacement::findBestLoopTopHelper(
19751981
if (Pred->succ_size() > 2)
19761982
continue;
19771983

1978-
MachineBasicBlock *OtherBB = nullptr;
1979-
if (Pred->succ_size() == 2) {
1980-
OtherBB = *Pred->succ_begin();
1981-
if (OtherBB == OldTop)
1982-
OtherBB = *Pred->succ_rbegin();
1983-
}
1984-
19851984
if (!canMoveBottomBlockToTop(Pred, OldTop))
19861985
continue;
19871986

1988-
BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB,
1989-
LoopBlockSet);
1990-
if ((Gains > 0) && (Gains > BestGains ||
1991-
((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) {
1992-
BestPred = Pred;
1993-
BestGains = Gains;
1987+
if (HasStaticProfileOnly) {
1988+
// In plain mode we consider pattern 1 only.
1989+
if (Pred->succ_size() > 1)
1990+
continue;
1991+
1992+
BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
1993+
if (!BestPred || PredFreq > BestGains ||
1994+
(!(PredFreq < BestGains) &&
1995+
Pred->isLayoutSuccessor(OldTop))) {
1996+
BestPred = Pred;
1997+
BestGains = PredFreq;
1998+
}
1999+
} else {
2000+
// With profile information we also consider pattern 2.
2001+
MachineBasicBlock *OtherBB = nullptr;
2002+
if (Pred->succ_size() == 2) {
2003+
OtherBB = *Pred->succ_begin();
2004+
if (OtherBB == OldTop)
2005+
OtherBB = *Pred->succ_rbegin();
2006+
}
2007+
2008+
// And more sophisticated cost model.
2009+
BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB,
2010+
LoopBlockSet);
2011+
if ((Gains > 0) && (Gains > BestGains ||
2012+
((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) {
2013+
BestPred = Pred;
2014+
BestGains = Gains;
2015+
}
19942016
}
19952017
}
19962018

@@ -2010,7 +2032,7 @@ MachineBlockPlacement::findBestLoopTopHelper(
20102032
return BestPred;
20112033
}
20122034

2013-
/// Find the best loop top block for layout.
2035+
/// Find the best loop top block for layout in FDO mode.
20142036
///
20152037
/// This function iteratively calls findBestLoopTopHelper, until no new better
20162038
/// BB can be found.
@@ -2038,15 +2060,42 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
20382060
return NewTop;
20392061
}
20402062

2063+
/// Find the best loop top block for layout in plain mode. It is less agressive
2064+
/// than findBestLoopTop.
2065+
///
2066+
/// Look for a block which is strictly better than the loop header for laying
2067+
/// out at the top of the loop. This looks for one and only one pattern:
2068+
/// a latch block with no conditional exit. This block will cause a conditional
2069+
/// jump around it or will be the bottom of the loop if we lay it out in place,
2070+
/// but if it doesn't end up at the bottom of the loop for any reason,
2071+
/// rotation alone won't fix it. Because such a block will always result in an
2072+
/// unconditional jump (for the backedge) rotating it in front of the loop
2073+
/// header is always profitable.
2074+
MachineBasicBlock *
2075+
MachineBlockPlacement::findBestLoopTopNoProfile(
2076+
const MachineLoop &L,
2077+
const BlockFilterSet &LoopBlockSet) {
2078+
// Placing the latch block before the header may introduce an extra branch
2079+
// that skips this block the first time the loop is executed, which we want
2080+
// to avoid when optimising for size.
2081+
// FIXME: in theory there is a case that does not introduce a new branch,
2082+
// i.e. when the layout predecessor does not fallthrough to the loop header.
2083+
// In practice this never happens though: there always seems to be a preheader
2084+
// that can fallthrough and that is also placed before the header.
2085+
if (F->getFunction().hasOptSize())
2086+
return L.getHeader();
2087+
2088+
return findBestLoopTopHelper(L.getHeader(), L, LoopBlockSet, true);
2089+
}
2090+
20412091
/// Find the best loop exiting block for layout.
20422092
///
20432093
/// This routine implements the logic to analyze the loop looking for the best
20442094
/// block to layout at the top of the loop. Typically this is done to maximize
20452095
/// fallthrough opportunities.
20462096
MachineBasicBlock *
20472097
MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
2048-
const BlockFilterSet &LoopBlockSet,
2049-
BlockFrequency &ExitFreq) {
2098+
const BlockFilterSet &LoopBlockSet) {
20502099
// We don't want to layout the loop linearly in all cases. If the loop header
20512100
// is just a normal basic block in the loop, we want to look for what block
20522101
// within the loop is the best one to layout at the top. However, if the loop
@@ -2157,7 +2206,6 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
21572206

21582207
LLVM_DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB)
21592208
<< "\n");
2160-
ExitFreq = BestExitEdgeFreq;
21612209
return ExitingBB;
21622210
}
21632211

@@ -2202,7 +2250,6 @@ MachineBlockPlacement::hasViableTopFallthrough(
22022250
/// of its bottom already, don't rotate it.
22032251
void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
22042252
const MachineBasicBlock *ExitingBB,
2205-
BlockFrequency ExitFreq,
22062253
const BlockFilterSet &LoopBlockSet) {
22072254
if (!ExitingBB)
22082255
return;
@@ -2226,12 +2273,6 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
22262273
(!SuccChain || Succ == *SuccChain->begin()))
22272274
return;
22282275
}
2229-
2230-
// Rotate will destroy the top fallthrough, we need to ensure the new exit
2231-
// frequency is larger than top fallthrough.
2232-
BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
2233-
if (FallThrough2Top >= ExitFreq)
2234-
return;
22352276
}
22362277

22372278
BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB);
@@ -2483,7 +2524,10 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
24832524
// loop. This will default to the header, but may end up as one of the
24842525
// predecessors to the header if there is one which will result in strictly
24852526
// fewer branches in the loop body.
2486-
MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
2527+
MachineBasicBlock *LoopTop =
2528+
(RotateLoopWithProfile || F->getFunction().hasProfileData()) ?
2529+
findBestLoopTop(L, LoopBlockSet) :
2530+
findBestLoopTopNoProfile(L, LoopBlockSet);
24872531

24882532
// If we selected just the header for the loop top, look for a potentially
24892533
// profitable exit block in the event that rotating the loop can eliminate
@@ -2492,9 +2536,8 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
24922536
// Loops are processed innermost to uttermost, make sure we clear
24932537
// PreferredLoopExit before processing a new loop.
24942538
PreferredLoopExit = nullptr;
2495-
BlockFrequency ExitFreq;
24962539
if (!RotateLoopWithProfile && LoopTop == L.getHeader())
2497-
PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
2540+
PreferredLoopExit = findBestLoopExit(L, LoopBlockSet);
24982541

24992542
BlockChain &LoopChain = *BlockToChain[LoopTop];
25002543

@@ -2511,10 +2554,11 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
25112554

25122555
buildChain(LoopTop, LoopChain, &LoopBlockSet);
25132556

2514-
if (RotateLoopWithProfile)
2515-
rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
2516-
else
2517-
rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
2557+
if (RotateLoopWithProfile) {
2558+
if (LoopTop == L.getHeader())
2559+
rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
2560+
} else
2561+
rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet);
25182562

25192563
LLVM_DEBUG({
25202564
// Crash at the end so we get all of the debugging output first.

llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ define i1 @test_conditional2(i32 %a, i32 %b, i32* %c) {
111111
; CHECK: mov w22, #2
112112
; CHECK-NOT: mov w22, #4
113113
; CHECK-NOT: cmn w22, #4
114-
; CHECK: [[LOOP2:LBB[0-9]+_[0-9]+]]: ; %for.cond
114+
; CHECK: b [[LOOP2:LBB[0-9]+_[0-9]+]]
115115
; CHECK-NOT: b.ne [[LOOP2]]
116116
; CHECK-NOT: b {{LBB[0-9]+_[0-9]+}}
117117
; CHECK: bl _foo

llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
; RUN: llc <%s -mtriple=aarch64-eabi -verify-machine-dom-info | FileCheck %s
22

33
; CHECK-LABEL: test:
4-
; CHECK-LABEL: %cond.false12.i
5-
; CHECK: b.gt
4+
; CHECK: LBB0_7:
5+
; CHECK: b.hi
6+
; CHECK-NEXT: b
67
; CHECK-NEXT: LBB0_8:
78
; CHECK-NEXT: mov x8, x9
89
; CHECK-NEXT: LBB0_9:

llvm/test/CodeGen/AMDGPU/collapse-endcf.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -230,11 +230,6 @@ bb.end: ; preds = %bb.then, %bb
230230
; Make sure scc liveness is updated if sor_b64 is removed
231231
; ALL-LABEL: {{^}}scc_liveness:
232232

233-
; GCN: %bb10
234-
; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
235-
; GCN: s_andn2_b64
236-
; GCN-NEXT: s_cbranch_execz
237-
238233
; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]:
239234
; GCN: s_andn2_b64 exec, exec,
240235
; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]]
@@ -244,6 +239,10 @@ bb.end: ; preds = %bb.then, %bb
244239

245240
; GCN-NOT: s_or_b64 exec, exec
246241

242+
; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
243+
; GCN: s_andn2_b64
244+
; GCN-NEXT: s_cbranch_execnz
245+
247246
; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
248247
; GCN: buffer_store_dword
249248
; GCN: buffer_store_dword

llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,41 +20,38 @@ define amdgpu_ps void @main(i32, float) {
2020
; CHECK-NEXT: ; implicit-def: $sgpr8_sgpr9
2121
; CHECK-NEXT: ; implicit-def: $sgpr6_sgpr7
2222
; CHECK-NEXT: ; implicit-def: $sgpr2_sgpr3
23-
; CHECK-NEXT: s_branch BB0_3
24-
; CHECK-NEXT: BB0_1: ; %Flow1
25-
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
26-
; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
27-
; CHECK-NEXT: s_mov_b64 s[8:9], 0
28-
; CHECK-NEXT: BB0_2: ; %Flow
29-
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
30-
; CHECK-NEXT: s_and_b64 s[10:11], exec, s[6:7]
31-
; CHECK-NEXT: s_or_b64 s[10:11], s[10:11], s[4:5]
32-
; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
33-
; CHECK-NEXT: s_and_b64 s[4:5], s[8:9], exec
34-
; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
35-
; CHECK-NEXT: s_mov_b64 s[4:5], s[10:11]
36-
; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11]
37-
; CHECK-NEXT: s_cbranch_execz BB0_6
38-
; CHECK-NEXT: BB0_3: ; %loop
23+
; CHECK-NEXT: BB0_1: ; %loop
3924
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
4025
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 32, v1
4126
; CHECK-NEXT: s_and_b64 vcc, exec, vcc
4227
; CHECK-NEXT: s_or_b64 s[6:7], s[6:7], exec
4328
; CHECK-NEXT: s_or_b64 s[8:9], s[8:9], exec
44-
; CHECK-NEXT: s_cbranch_vccz BB0_2
45-
; CHECK-NEXT: ; %bb.4: ; %endif1
46-
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
29+
; CHECK-NEXT: s_cbranch_vccz BB0_5
30+
; CHECK-NEXT: ; %bb.2: ; %endif1
31+
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
4732
; CHECK-NEXT: s_mov_b64 s[6:7], -1
4833
; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[0:1]
4934
; CHECK-NEXT: s_xor_b64 s[8:9], exec, s[8:9]
50-
; CHECK-NEXT: ; mask branch BB0_1
51-
; CHECK-NEXT: s_cbranch_execz BB0_1
52-
; CHECK-NEXT: BB0_5: ; %endif2
53-
; CHECK-NEXT: ; in Loop: Header=BB0_3 Depth=1
35+
; CHECK-NEXT: ; mask branch BB0_4
36+
; CHECK-NEXT: BB0_3: ; %endif2
37+
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
5438
; CHECK-NEXT: v_add_u32_e32 v1, 1, v1
5539
; CHECK-NEXT: s_xor_b64 s[6:7], exec, -1
56-
; CHECK-NEXT: s_branch BB0_1
57-
; CHECK-NEXT: BB0_6: ; %Flow2
40+
; CHECK-NEXT: BB0_4: ; %Flow1
41+
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
42+
; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
43+
; CHECK-NEXT: s_mov_b64 s[8:9], 0
44+
; CHECK-NEXT: BB0_5: ; %Flow
45+
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
46+
; CHECK-NEXT: s_and_b64 s[10:11], exec, s[6:7]
47+
; CHECK-NEXT: s_or_b64 s[10:11], s[10:11], s[4:5]
48+
; CHECK-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
49+
; CHECK-NEXT: s_and_b64 s[4:5], s[8:9], exec
50+
; CHECK-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
51+
; CHECK-NEXT: s_mov_b64 s[4:5], s[10:11]
52+
; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11]
53+
; CHECK-NEXT: s_cbranch_execnz BB0_1
54+
; CHECK-NEXT: ; %bb.6: ; %Flow2
5855
; CHECK-NEXT: s_or_b64 exec, exec, s[10:11]
5956
; CHECK-NEXT: v_mov_b32_e32 v1, 0
6057
; CHECK-NEXT: s_and_saveexec_b64 s[0:1], s[2:3]
@@ -65,7 +62,6 @@ define amdgpu_ps void @main(i32, float) {
6562
; CHECK-NEXT: s_or_b64 exec, exec, s[0:1]
6663
; CHECK-NEXT: exp mrt0 v1, v1, v1, v1 done vm
6764
; CHECK-NEXT: s_endpgm
68-
; this is the divergent branch with the condition not marked as divergent
6965
start:
7066
%v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0)
7167
br label %loop

llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,27 @@
11
; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads=true -verify-machineinstrs < %s | FileCheck %s
22

3-
; CHECK-LABEL: %bb22
3+
; CHECK-LABEL: %bb11
44

5-
; Load from %arg has alias store in Loop
5+
; Load from %arg in a Loop body has alias store
66

77
; CHECK: flat_load_dword
88

9-
; #####################################################################
10-
11-
; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i]
12-
13-
; CHECK: s_load_dword
9+
; CHECK-LABEL: %bb20
10+
; CHECK: flat_store_dword
1411

1512
; #####################################################################
1613

17-
; CHECK-LABEL: %bb11
14+
; CHECK-LABEL: %bb22
1815

19-
; Load from %arg in a Loop body has alias store
16+
; Load from %arg has alias store in Loop
2017

2118
; CHECK: flat_load_dword
2219

23-
; CHECK-LABEL: %bb20
20+
; #####################################################################
2421

25-
; CHECK: flat_store_dword
22+
; Load from %arg1 has no-alias store in Loop - arg1[i+1] never alias arg1[i]
23+
24+
; CHECK: s_load_dword
2625

2726
define amdgpu_kernel void @cfg(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 {
2827
bb:

llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,20 @@
33

44
; SI-LABEL: {{^}}i1_copy_from_loop:
55
;
6-
; SI: ; %Flow
7-
; SI-DAG: s_andn2_b64 [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec
8-
; SI-DAG: s_and_b64 [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], exec
9-
; SI: s_or_b64 [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]]
10-
116
; SI: ; %for.body
127
; SI: v_cmp_gt_u32_e64 [[CC_SREG:s\[[0-9]+:[0-9]+\]]], 4,
13-
; SI-DAG: s_andn2_b64 [[CC_ACCUM]], [[CC_ACCUM]], exec
8+
; SI-DAG: s_andn2_b64 [[CC_ACCUM:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec
149
; SI-DAG: s_and_b64 [[CC_MASK:s\[[0-9]+:[0-9]+\]]], [[CC_SREG]], exec
1510
; SI: s_or_b64 [[CC_ACCUM]], [[CC_ACCUM]], [[CC_MASK]]
1611

1712
; SI: ; %Flow1
1813
; SI: s_or_b64 [[CC_ACCUM]], [[CC_ACCUM]], exec
1914

15+
; SI: ; %Flow
16+
; SI-DAG: s_andn2_b64 [[LCSSA_ACCUM:s\[[0-9]+:[0-9]+\]]], [[LCSSA_ACCUM]], exec
17+
; SI-DAG: s_and_b64 [[CC_MASK2:s\[[0-9]+:[0-9]+\]]], [[CC_ACCUM]], exec
18+
; SI: s_or_b64 [[LCSSA_ACCUM]], [[LCSSA_ACCUM]], [[CC_MASK2]]
19+
2020
; SI: ; %for.end
2121
; SI: s_and_saveexec_b64 {{s\[[0-9]+:[0-9]+\]}}, [[LCSSA_ACCUM]]
2222

0 commit comments

Comments
 (0)