Skip to content

Commit 6bb4055

Browse files
committed
[LoopVectorize] Add support for invariant stores of ordered reductions
Reviewed By: david-arm Differential Revision: https://reviews.llvm.org/D126772
1 parent 929d5de commit 6bb4055

File tree

2 files changed

+29
-6
lines changed

2 files changed

+29
-6
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1017,11 +1017,10 @@ bool LoopVectorizationLegality::canVectorizeFPMath(
10171017

10181018
// We can now only vectorize if all reductions with Exact FP math also
10191019
// have the isOrdered flag set, which indicates that we can move the
1020-
// reduction operations in-loop, and do not have intermediate store.
1020+
// reduction operations in-loop.
10211021
return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
10221022
const RecurrenceDescriptor &RdxDesc = Reduction.second;
1023-
return !RdxDesc.hasExactFPMath() ||
1024-
(RdxDesc.isOrdered() && !RdxDesc.IntermediateStore);
1023+
return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
10251024
}));
10261025
}
10271026

llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1361,10 +1361,34 @@ declare float @llvm.fmuladd.f32(float, float, float)
13611361
; Test case with invariant store where fadd is strict.
13621362
define void @reduction_store_to_invariant_address(float* %dst, float* readonly %src) {
13631363
; CHECK-ORDERED-LABEL: @reduction_store_to_invariant_address(
1364-
; CHECK-ORDERED-NOT: vector.body
1364+
; CHECK-ORDERED: entry
1365+
; CHECK-ORDERED: %[[DEST_PTR:.*]] = getelementptr inbounds float, float* %dst, i64 42
1366+
; CHECK-ORDERED: vector.body
1367+
; CHECK-ORDERED: %[[VEC_PHI:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ]
1368+
; CHECK-ORDERED: %[[LOAD_VEC:.*]] = load <8 x float>, <8 x float>*
1369+
; CHECK-ORDERED: %[[RDX:.*]] = call float @llvm.vector.reduce.fadd.v8f32(float %[[VEC_PHI]], <8 x float> %[[LOAD_VEC]])
1370+
; CHECK-ORDERED: middle.block
1371+
; CHECK-ORDERED: store float %[[RDX]], float* %[[DEST_PTR]]
1372+
; CHECK-ORDERED: for.body
1373+
; CHECK-ORDERED: %[[LOAD:.*]] = load float, float*
1374+
; CHECK-ORDERED: %[[FADD:.*]] = fadd float %{{.*}}, %[[LOAD]]
1375+
; CHECK-ORDERED: store float %[[FADD]], float* %[[DEST_PTR]]
13651376

13661377
; CHECK-UNORDERED-LABEL: @reduction_store_to_invariant_address(
1367-
; CHECK-UNORDERED-NOT: vector.body
1378+
; CHECK-UNORDERED: entry
1379+
; CHECK-UNORDERED: %[[DEST_PTR:.*]] = getelementptr inbounds float, float* %dst, i64 42
1380+
; CHECK-UNORDERED: vector.body
1381+
; CHECK-UNORDERED: %[[VEC_PHI:.*]] = phi <8 x float> [ <float 0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %vector.ph ], [ %[[FADD_VEC:.*]], %vector.body ]
1382+
; CHECK-UNORDERED: %[[LOAD_VEC:.*]] = load <8 x float>, <8 x float>*
1383+
; CHECK-UNORDERED: %[[FADD_VEC]] = fadd <8 x float> %[[VEC_PHI]], %[[LOAD_VEC]]
1384+
; CHECK-UNORDERED-NOT: call float @llvm.vector.reduce.fadd
1385+
; CHECK-UNORDERED: middle.block
1386+
; CHECK-UNORDERED: %[[RDX:.*]] = call float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> %[[FADD_VEC]])
1387+
; CHECK-UNORDERED: store float %[[RDX]], float* %[[DEST_PTR]]
1388+
; CHECK-UNORDERED: for.body
1389+
; CHECK-UNORDERED: %[[LOAD:.*]] = load float, float*
1390+
; CHECK-UNORDERED: %[[FADD:.*]] = fadd float {{.*}}, %[[LOAD]]
1391+
; CHECK-UNORDERED: store float %[[FADD]], float* %[[DEST_PTR]]
13681392

13691393
; CHECK-NOT-VECTORIZED-LABEL: @reduction_store_to_invariant_address(
13701394
; CHECK-NOT-VECTORIZED-NOT: vector.body
@@ -1383,7 +1407,7 @@ for.body:
13831407
store float %add, float* %arrayidx, align 4
13841408
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
13851409
%exitcond = icmp eq i64 %indvars.iv.next, 1000
1386-
br i1 %exitcond, label %for.cond.cleanup, label %for.body
1410+
br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !0
13871411

13881412
for.cond.cleanup:
13891413
ret void

0 commit comments

Comments
 (0)