Skip to content

Commit 40a72f8

Browse files
committed
[VPlan] Support extracting any lane of uniform value.
If the value we are extracting a lane from is uniform, only the first lane will be set. Return lane 0 for any requested lane. This fixes a crash when trying to extract the last lane for a first-order recurrence resume value. Fixes #95520.
1 parent e7acb37 commit 40a72f8

File tree

7 files changed

+127
-33
lines changed

7 files changed

+127
-33
lines changed

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,11 @@ Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
231231
return Data
232232
.PerPartScalars[Def][Instance.Part][Instance.Lane.mapToCacheIndex(VF)];
233233
}
234+
if (!Instance.Lane.isFirstLane() &&
235+
vputils::isUniformAfterVectorization(Def) &&
236+
hasScalarValue(Def, {Instance.Part, VPLane::getFirstLane()})) {
237+
return Data.PerPartScalars[Def][Instance.Part][0];
238+
}
234239

235240
assert(hasVectorValue(Def, Instance.Part));
236241
auto *VecPart = Data.PerPartOutput[Def][Instance.Part];

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,8 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
577577
// When loop is unrolled without vectorizing, retrieve UF - Offset.
578578
Res = State.get(getOperand(0), State.UF - Offset);
579579
}
580-
Res->setName(Name);
580+
if (isa<ExtractElementInst>(Res))
581+
Res->setName(Name);
581582
return Res;
582583
}
583584
case VPInstruction::LogicalAnd: {

llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,6 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
100100
; DEFAULT: middle.block:
101101
; DEFAULT-NEXT: [[BIN_RDX:%.*]] = or <vscale x 4 x i32> [[TMP58]], [[TMP57]]
102102
; DEFAULT-NEXT: [[TMP60:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> [[BIN_RDX]])
103-
; DEFAULT-NEXT: [[TMP61:%.*]] = call i32 @llvm.vscale.i32()
104-
; DEFAULT-NEXT: [[TMP62:%.*]] = mul i32 [[TMP61]], 4
105-
; DEFAULT-NEXT: [[TMP63:%.*]] = sub i32 [[TMP62]], 1
106-
; DEFAULT-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i32> [[BROADCAST_SPLAT5]], i32 [[TMP63]]
107103
; DEFAULT-NEXT: [[TMP64:%.*]] = call i32 @llvm.vscale.i32()
108104
; DEFAULT-NEXT: [[TMP65:%.*]] = mul i32 [[TMP64]], 4
109105
; DEFAULT-NEXT: [[TMP66:%.*]] = sub i32 [[TMP65]], 1
@@ -112,7 +108,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
112108
; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
113109
; DEFAULT: scalar.ph:
114110
; DEFAULT-NEXT: [[SCALAR_RECUR_INIT14:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT13]], [[MIDDLE_BLOCK]] ]
115-
; DEFAULT-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
111+
; DEFAULT-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ]
116112
; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
117113
; DEFAULT-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP60]], [[MIDDLE_BLOCK]] ]
118114
; DEFAULT-NEXT: br label [[LOOP:%.*]]
@@ -225,18 +221,14 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
225221
; PRED-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
226222
; PRED: middle.block:
227223
; PRED-NEXT: [[TMP45:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> [[TMP42]])
228-
; PRED-NEXT: [[TMP46:%.*]] = call i32 @llvm.vscale.i32()
229-
; PRED-NEXT: [[TMP47:%.*]] = mul i32 [[TMP46]], 4
230-
; PRED-NEXT: [[TMP48:%.*]] = sub i32 [[TMP47]], 1
231-
; PRED-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i32> [[BROADCAST_SPLAT]], i32 [[TMP48]]
232224
; PRED-NEXT: [[TMP49:%.*]] = call i32 @llvm.vscale.i32()
233225
; PRED-NEXT: [[TMP50:%.*]] = mul i32 [[TMP49]], 4
234226
; PRED-NEXT: [[TMP51:%.*]] = sub i32 [[TMP50]], 1
235227
; PRED-NEXT: [[VECTOR_RECUR_EXTRACT8:%.*]] = extractelement <vscale x 4 x i32> [[TMP22]], i32 [[TMP51]]
236228
; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
237229
; PRED: scalar.ph:
238230
; PRED-NEXT: [[SCALAR_RECUR_INIT9:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT8]], [[MIDDLE_BLOCK]] ]
239-
; PRED-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
231+
; PRED-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ]
240232
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
241233
; PRED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ]
242234
; PRED-NEXT: br label [[LOOP:%.*]]

llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -657,13 +657,17 @@ define double @test_resinking_required(ptr %p, ptr noalias %a, ptr noalias %b) {
657657
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
658658
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
659659
; CHECK: middle.block:
660-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 2
661-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 3
662-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 2
663-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT5:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 3
664660
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI10:%.*]] = extractelement <4 x double> [[TMP4]], i32 2
665661
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <4 x double> [[TMP4]], i32 3
666662
; CHECK-NEXT: br i1 true, label %End, label %scalar.ph
663+
; CHECK: scalar.ph:
664+
; CHECK-NEXT: phi double [ 0.000000e+00, %Entry ], [ [[VECTOR_RECUR_EXTRACT9]], %middle.block ]
665+
; CHECK-NEXT: phi double [ 0.000000e+00, %Entry ], [ [[TMP3]], %middle.block ]
666+
; CHECK-NEXT: phi double [ 0.000000e+00, %Entry ], [ [[TMP0]], %middle.block ]
667+
; CHECK: End:
668+
; CHECK-NEXT: = phi double [ {{.+}}, %Loop ], [ [[TMP0]], %middle.block ]
669+
; CHECK-NEXT: = phi double [ {{.+}}, %Loop ], [ [[TMP3]], %middle.block ]
670+
; CHECK-NEXT: = phi double [ {{.+}}, %Loop ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI10]], %middle.block ]
667671
;
668672
Entry:
669673
br label %Loop

llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -385,12 +385,10 @@ define void @instruction_with_2_FOR_operands(ptr noalias %A, ptr noalias %B, ptr
385385
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
386386
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
387387
; CHECK: middle.block:
388-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT3]], i32 3
389-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT4:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT]], i32 3
390388
; CHECK-NEXT: br i1 false, label [[BB74:%.*]], label [[SCALAR_PH]]
391389
; CHECK: scalar.ph:
392-
; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ 1.000000e+00, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT4]], [[MIDDLE_BLOCK]] ]
393-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, [[BB]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
390+
; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ 1.000000e+00, [[BB:%.*]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ]
391+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, [[BB]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
394392
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
395393
; CHECK-NEXT: br label [[BB13:%.*]]
396394
; CHECK: bb13:
@@ -463,12 +461,10 @@ define void @instruction_with_2_FOR_operands_and_multiple_other_uses(ptr noalias
463461
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
464462
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
465463
; CHECK: middle.block:
466-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT3]], i32 3
467-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT4:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT]], i32 3
468464
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
469465
; CHECK: scalar.ph:
470-
; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ 0.000000e+00, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT4]], [[MIDDLE_BLOCK]] ]
471-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, [[BB]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
466+
; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ 0.000000e+00, [[BB:%.*]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ]
467+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, [[BB]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
472468
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
473469
; CHECK-NEXT: br label [[LOOP:%.*]]
474470
; CHECK: loop:
@@ -556,12 +552,10 @@ define void @instruction_with_2_FOR_operands_and_multiple_other_uses_chain(ptr n
556552
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
557553
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
558554
; CHECK: middle.block:
559-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT3]], i32 3
560-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT4:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT]], i32 3
561555
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
562556
; CHECK: scalar.ph:
563-
; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ 0.000000e+00, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT4]], [[MIDDLE_BLOCK]] ]
564-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, [[BB]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
557+
; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ 0.000000e+00, [[BB:%.*]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ]
558+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, [[BB]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
565559
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
566560
; CHECK-NEXT: br label [[LOOP:%.*]]
567561
; CHECK: loop:

llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -121,12 +121,10 @@ define void @test_pr54223_sink_after_insertion_order(ptr noalias %a, ptr noalias
121121
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
122122
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
123123
; CHECK: middle.block:
124-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT]], i32 3
125-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT4:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT3]], i32 3
126124
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
127125
; CHECK: scalar.ph:
128-
; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT4]], [[MIDDLE_BLOCK]] ]
129-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
126+
; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
127+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ]
130128
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
131129
; CHECK-NEXT: br label [[LOOP:%.*]]
132130
; CHECK: loop:
@@ -246,10 +244,9 @@ define void @test_pr54233_for_depend_on_each_other(ptr noalias %a, ptr noalias %
246244
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
247245
; CHECK: middle.block:
248246
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
249-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[BROADCAST_SPLAT]], i32 3
250247
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
251248
; CHECK: scalar.ph:
252-
; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ]
249+
; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ]
253250
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
254251
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
255252
; CHECK-NEXT: br label [[LOOP:%.*]]

llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3750,5 +3750,106 @@ for.end:
37503750
ret void
37513751
}
37523752

3753+
; Test case for https://github.com/llvm/llvm-project/issues/95520.
3754+
define i32 @recurence_uniform_load(ptr %src, ptr noalias %dst) {
3755+
; UNROLL-NO-IC-LABEL: @recurence_uniform_load(
3756+
; UNROLL-NO-IC-NEXT: entry:
3757+
; UNROLL-NO-IC-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3758+
; UNROLL-NO-IC: vector.ph:
3759+
; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
3760+
; UNROLL-NO-IC: vector.body:
3761+
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3762+
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ]
3763+
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
3764+
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
3765+
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
3766+
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
3767+
; UNROLL-NO-IC-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
3768+
; UNROLL-NO-IC: middle.block:
3769+
; UNROLL-NO-IC-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
3770+
; UNROLL-NO-IC: scalar.ph:
3771+
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ]
3772+
; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 8, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
3773+
; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]]
3774+
; UNROLL-NO-IC: loop:
3775+
; UNROLL-NO-IC-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ]
3776+
; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD:%.*]], [[LOOP]] ]
3777+
; UNROLL-NO-IC-NEXT: [[ADD]] = add i64 [[PHI]], 1
3778+
; UNROLL-NO-IC-NEXT: [[LOAD]] = load i32, ptr [[SRC]], align 4
3779+
; UNROLL-NO-IC-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], 1
3780+
; UNROLL-NO-IC-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP38:![0-9]+]]
3781+
; UNROLL-NO-IC: exit:
3782+
; UNROLL-NO-IC-NEXT: ret i32 0
3783+
;
3784+
; UNROLL-NO-VF-LABEL: @recurence_uniform_load(
3785+
; UNROLL-NO-VF-NEXT: entry:
3786+
; UNROLL-NO-VF-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3787+
; UNROLL-NO-VF: vector.ph:
3788+
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
3789+
; UNROLL-NO-VF: vector.body:
3790+
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3791+
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ]
3792+
; UNROLL-NO-VF-NEXT: [[TMP0]] = load i32, ptr [[SRC:%.*]], align 4
3793+
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
3794+
; UNROLL-NO-VF-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
3795+
; UNROLL-NO-VF: middle.block:
3796+
; UNROLL-NO-VF-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
3797+
; UNROLL-NO-VF: scalar.ph:
3798+
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ]
3799+
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
3800+
; UNROLL-NO-VF-NEXT: br label [[LOOP:%.*]]
3801+
; UNROLL-NO-VF: loop:
3802+
; UNROLL-NO-VF-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ]
3803+
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD:%.*]], [[LOOP]] ]
3804+
; UNROLL-NO-VF-NEXT: [[ADD]] = add i64 [[PHI]], 1
3805+
; UNROLL-NO-VF-NEXT: [[LOAD]] = load i32, ptr [[SRC]], align 4
3806+
; UNROLL-NO-VF-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], 1
3807+
; UNROLL-NO-VF-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP38:![0-9]+]]
3808+
; UNROLL-NO-VF: exit:
3809+
; UNROLL-NO-VF-NEXT: ret i32 0
3810+
;
3811+
; SINK-AFTER-LABEL: @recurence_uniform_load(
3812+
; SINK-AFTER-NEXT: entry:
3813+
; SINK-AFTER-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3814+
; SINK-AFTER: vector.ph:
3815+
; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
3816+
; SINK-AFTER: vector.body:
3817+
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3818+
; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ]
3819+
; SINK-AFTER-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
3820+
; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
3821+
; SINK-AFTER-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
3822+
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
3823+
; SINK-AFTER-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
3824+
; SINK-AFTER: middle.block:
3825+
; SINK-AFTER-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
3826+
; SINK-AFTER: scalar.ph:
3827+
; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ]
3828+
; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
3829+
; SINK-AFTER-NEXT: br label [[LOOP:%.*]]
3830+
; SINK-AFTER: loop:
3831+
; SINK-AFTER-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ]
3832+
; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD:%.*]], [[LOOP]] ]
3833+
; SINK-AFTER-NEXT: [[ADD]] = add i64 [[PHI]], 1
3834+
; SINK-AFTER-NEXT: [[LOAD]] = load i32, ptr [[SRC]], align 4
3835+
; SINK-AFTER-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], 1
3836+
; SINK-AFTER-NEXT: br i1 [[ICMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP38:![0-9]+]]
3837+
; SINK-AFTER: exit:
3838+
; SINK-AFTER-NEXT: ret i32 0
3839+
;
3840+
entry:
3841+
br label %loop
3842+
3843+
loop:
3844+
%phi = phi i64 [ 0, %entry ], [ %add, %loop ]
3845+
%recur = phi i32 [ 0, %entry ], [ %load, %loop ]
3846+
%add = add i64 %phi, 1
3847+
%load = load i32, ptr %src, align 4
3848+
%icmp = icmp ult i64 %phi, 1
3849+
br i1 %icmp, label %loop, label %exit
3850+
3851+
exit:
3852+
ret i32 0
3853+
}
37533854

37543855
!2 = !{!"branch_weights", i32 1, i32 1}

0 commit comments

Comments
 (0)