Skip to content

Commit d65e06e

Browse files
committed
Block FMinimum and FMaximum
1 parent 868b8e9 commit d65e06e

File tree

2 files changed

+97
-19
lines changed

2 files changed

+97
-19
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1526,6 +1526,17 @@ class LoopVectorizationCostModel {
15261526
ForceTailFoldingStyle.getValue());
15271527
if (ForceTailFoldingStyle != TailFoldingStyle::DataWithEVL)
15281528
return;
1529+
1530+
// Block folding with EVL since vector-predication intrinsics have not
1531+
// support FMinimum and FMaximum reduction.
1532+
// FIXME: remove this check once llvm.vp.reduce.fminimum/fmaximum are
1533+
// supported
1534+
bool ContainsFMinimumOrFMaximumReduction =
1535+
any_of(Legal->getReductionVars(), [&](auto &Reduction) {
1536+
const RecurrenceDescriptor &RdxDesc = Reduction.second;
1537+
RecurKind Kind = RdxDesc.getRecurrenceKind();
1538+
return Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum;
1539+
});
15291540
// Override forced styles if needed.
15301541
// FIXME: use actual opcode/data type for analysis here.
15311542
// FIXME: Investigate opportunity for fixed vector factor.
@@ -1534,7 +1545,8 @@ class LoopVectorizationCostModel {
15341545
TTI.hasActiveVectorLength(0, nullptr, Align()) &&
15351546
!EnableVPlanNativePath &&
15361547
// FIXME: implement support for max safe dependency distance.
1537-
Legal->isSafeForAnyVectorWidth();
1548+
Legal->isSafeForAnyVectorWidth() &&
1549+
!ContainsFMinimumOrFMaximumReduction;
15381550
if (!EVLIsLegal) {
15391551
// If for some reason EVL mode is unsupported, fallback to
15401552
// DataWithoutLaneMask to try to vectorize the loop with folded tail

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll

Lines changed: 84 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,18 +1214,51 @@ for.end:
12141214
define float @fminimum(ptr %a, i64 %n, float %start) {
12151215
; IF-EVL-LABEL: @fminimum(
12161216
; IF-EVL-NEXT: entry:
1217+
; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1218+
; IF-EVL: vector.ph:
1219+
; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], 7
1220+
; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8
1221+
; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1222+
; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
1223+
; IF-EVL-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[START:%.*]], i64 0
1224+
; IF-EVL-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <8 x float> [[MINMAX_IDENT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
1225+
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
1226+
; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer
1227+
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
1228+
; IF-EVL: vector.body:
1229+
; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1230+
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
1231+
; IF-EVL-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1232+
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0
1233+
; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
1234+
; IF-EVL-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
1235+
; IF-EVL-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], [[BROADCAST_SPLAT2]]
1236+
; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]]
1237+
; IF-EVL-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
1238+
; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x float> poison)
1239+
; IF-EVL-NEXT: [[TMP4]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_MASKED_LOAD]])
1240+
; IF-EVL-NEXT: [[TMP5:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[TMP4]], <8 x float> [[VEC_PHI]]
1241+
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
1242+
; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1243+
; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
1244+
; IF-EVL: middle.block:
1245+
; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[TMP5]])
1246+
; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1247+
; IF-EVL: scalar.ph:
1248+
; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1249+
; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[START]], [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
12171250
; IF-EVL-NEXT: br label [[FOR_BODY:%.*]]
12181251
; IF-EVL: for.body:
1219-
; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1220-
; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START:%.*]], [[ENTRY]] ], [ [[MIN:%.*]], [[FOR_BODY]] ]
1221-
; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]]
1222-
; IF-EVL-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1223-
; IF-EVL-NEXT: [[MIN]] = tail call float @llvm.minimum.f32(float [[RDX]], float [[TMP0]])
1252+
; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1253+
; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN:%.*]], [[FOR_BODY]] ]
1254+
; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1255+
; IF-EVL-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1256+
; IF-EVL-NEXT: [[MIN]] = tail call float @llvm.minimum.f32(float [[RDX]], float [[TMP8]])
12241257
; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1225-
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]]
1226-
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4]]
1258+
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1259+
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
12271260
; IF-EVL: for.end:
1228-
; IF-EVL-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY]] ]
1261+
; IF-EVL-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
12291262
; IF-EVL-NEXT: ret float [[MIN_LCSSA]]
12301263
;
12311264
; NO-VP-LABEL: @fminimum(
@@ -1264,18 +1297,51 @@ for.end:
12641297
define float @fmaximum(ptr %a, i64 %n, float %start) {
12651298
; IF-EVL-LABEL: @fmaximum(
12661299
; IF-EVL-NEXT: entry:
1300+
; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1301+
; IF-EVL: vector.ph:
1302+
; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], 7
1303+
; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8
1304+
; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1305+
; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
1306+
; IF-EVL-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[START:%.*]], i64 0
1307+
; IF-EVL-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <8 x float> [[MINMAX_IDENT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
1308+
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
1309+
; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer
1310+
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
1311+
; IF-EVL: vector.body:
1312+
; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1313+
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
1314+
; IF-EVL-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1315+
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0
1316+
; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
1317+
; IF-EVL-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
1318+
; IF-EVL-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], [[BROADCAST_SPLAT2]]
1319+
; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]]
1320+
; IF-EVL-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
1321+
; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x float> poison)
1322+
; IF-EVL-NEXT: [[TMP4]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_MASKED_LOAD]])
1323+
; IF-EVL-NEXT: [[TMP5:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[TMP4]], <8 x float> [[VEC_PHI]]
1324+
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
1325+
; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1326+
; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
1327+
; IF-EVL: middle.block:
1328+
; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> [[TMP5]])
1329+
; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1330+
; IF-EVL: scalar.ph:
1331+
; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1332+
; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[START]], [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
12671333
; IF-EVL-NEXT: br label [[FOR_BODY:%.*]]
12681334
; IF-EVL: for.body:
1269-
; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1270-
; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START:%.*]], [[ENTRY]] ], [ [[MAX:%.*]], [[FOR_BODY]] ]
1271-
; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]]
1272-
; IF-EVL-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1273-
; IF-EVL-NEXT: [[MAX]] = tail call float @llvm.maximum.f32(float [[RDX]], float [[TMP0]])
1335+
; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1336+
; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MAX:%.*]], [[FOR_BODY]] ]
1337+
; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1338+
; IF-EVL-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1339+
; IF-EVL-NEXT: [[MAX]] = tail call float @llvm.maximum.f32(float [[RDX]], float [[TMP8]])
12741340
; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1275-
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]]
1276-
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4]]
1341+
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1342+
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
12771343
; IF-EVL: for.end:
1278-
; IF-EVL-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY]] ]
1344+
; IF-EVL-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
12791345
; IF-EVL-NEXT: ret float [[MAX_LCSSA]]
12801346
;
12811347
; NO-VP-LABEL: @fmaximum(
@@ -1360,7 +1426,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) {
13601426
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP24]], [[EVL_BASED_IV]]
13611427
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]]
13621428
; IF-EVL-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1363-
; IF-EVL-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
1429+
; IF-EVL-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
13641430
; IF-EVL: middle.block:
13651431
; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
13661432
; IF-EVL: scalar.ph:
@@ -1377,7 +1443,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) {
13771443
; IF-EVL-NEXT: [[MULADD]] = tail call reassoc float @llvm.fmuladd.f32(float [[TMP26]], float [[TMP27]], float [[RDX]])
13781444
; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
13791445
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1380-
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
1446+
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
13811447
; IF-EVL: for.end:
13821448
; IF-EVL-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ]
13831449
; IF-EVL-NEXT: ret float [[MULADD_LCSSA]]

0 commit comments

Comments
 (0)