Skip to content

Commit 71fe035

Browse files
committed
Revert "Block FMinimum and FMaximum"
This reverts commit d65e06e.
1 parent 2280745 commit 71fe035

File tree

2 files changed

+19
-97
lines changed

2 files changed

+19
-97
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1526,17 +1526,6 @@ class LoopVectorizationCostModel {
15261526
ForceTailFoldingStyle.getValue());
15271527
if (ForceTailFoldingStyle != TailFoldingStyle::DataWithEVL)
15281528
return;
1529-
1530-
// Block folding with EVL since vector-predication intrinsics have not
1531-
// support FMinimum and FMaximum reduction.
1532-
// FIXME: remove this check once llvm.vp.reduce.fminimum/fmaximum are
1533-
// supported
1534-
bool ContainsFMinimumOrFMaximumReduction =
1535-
any_of(Legal->getReductionVars(), [&](auto &Reduction) {
1536-
const RecurrenceDescriptor &RdxDesc = Reduction.second;
1537-
RecurKind Kind = RdxDesc.getRecurrenceKind();
1538-
return Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum;
1539-
});
15401529
// Override forced styles if needed.
15411530
// FIXME: use actual opcode/data type for analysis here.
15421531
// FIXME: Investigate opportunity for fixed vector factor.
@@ -1545,8 +1534,7 @@ class LoopVectorizationCostModel {
15451534
TTI.hasActiveVectorLength(0, nullptr, Align()) &&
15461535
!EnableVPlanNativePath &&
15471536
// FIXME: implement support for max safe dependency distance.
1548-
Legal->isSafeForAnyVectorWidth() &&
1549-
!ContainsFMinimumOrFMaximumReduction;
1537+
Legal->isSafeForAnyVectorWidth();
15501538
if (!EVLIsLegal) {
15511539
// If for some reason EVL mode is unsupported, fallback to
15521540
// DataWithoutLaneMask to try to vectorize the loop with folded tail

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll

Lines changed: 18 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,51 +1214,18 @@ for.end:
12141214
define float @fminimum(ptr %a, i64 %n, float %start) {
12151215
; IF-EVL-LABEL: @fminimum(
12161216
; IF-EVL-NEXT: entry:
1217-
; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1218-
; IF-EVL: vector.ph:
1219-
; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], 7
1220-
; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8
1221-
; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1222-
; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
1223-
; IF-EVL-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[START:%.*]], i64 0
1224-
; IF-EVL-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <8 x float> [[MINMAX_IDENT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
1225-
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
1226-
; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer
1227-
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
1228-
; IF-EVL: vector.body:
1229-
; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1230-
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
1231-
; IF-EVL-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1232-
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0
1233-
; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
1234-
; IF-EVL-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
1235-
; IF-EVL-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], [[BROADCAST_SPLAT2]]
1236-
; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]]
1237-
; IF-EVL-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
1238-
; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x float> poison)
1239-
; IF-EVL-NEXT: [[TMP4]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_MASKED_LOAD]])
1240-
; IF-EVL-NEXT: [[TMP5:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[TMP4]], <8 x float> [[VEC_PHI]]
1241-
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
1242-
; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1243-
; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
1244-
; IF-EVL: middle.block:
1245-
; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[TMP5]])
1246-
; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1247-
; IF-EVL: scalar.ph:
1248-
; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1249-
; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[START]], [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
12501217
; IF-EVL-NEXT: br label [[FOR_BODY:%.*]]
12511218
; IF-EVL: for.body:
1252-
; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1253-
; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN:%.*]], [[FOR_BODY]] ]
1254-
; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1255-
; IF-EVL-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1256-
; IF-EVL-NEXT: [[MIN]] = tail call float @llvm.minimum.f32(float [[RDX]], float [[TMP8]])
1219+
; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1220+
; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START:%.*]], [[ENTRY]] ], [ [[MIN:%.*]], [[FOR_BODY]] ]
1221+
; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]]
1222+
; IF-EVL-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1223+
; IF-EVL-NEXT: [[MIN]] = tail call float @llvm.minimum.f32(float [[RDX]], float [[TMP0]])
12571224
; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1258-
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1259-
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
1225+
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]]
1226+
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4]]
12601227
; IF-EVL: for.end:
1261-
; IF-EVL-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
1228+
; IF-EVL-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY]] ]
12621229
; IF-EVL-NEXT: ret float [[MIN_LCSSA]]
12631230
;
12641231
; NO-VP-LABEL: @fminimum(
@@ -1297,51 +1264,18 @@ for.end:
12971264
define float @fmaximum(ptr %a, i64 %n, float %start) {
12981265
; IF-EVL-LABEL: @fmaximum(
12991266
; IF-EVL-NEXT: entry:
1300-
; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1301-
; IF-EVL: vector.ph:
1302-
; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], 7
1303-
; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8
1304-
; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1305-
; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
1306-
; IF-EVL-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[START:%.*]], i64 0
1307-
; IF-EVL-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <8 x float> [[MINMAX_IDENT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
1308-
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
1309-
; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer
1310-
; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
1311-
; IF-EVL: vector.body:
1312-
; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1313-
; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
1314-
; IF-EVL-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1315-
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0
1316-
; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
1317-
; IF-EVL-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
1318-
; IF-EVL-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], [[BROADCAST_SPLAT2]]
1319-
; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]]
1320-
; IF-EVL-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
1321-
; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x float> poison)
1322-
; IF-EVL-NEXT: [[TMP4]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_MASKED_LOAD]])
1323-
; IF-EVL-NEXT: [[TMP5:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[TMP4]], <8 x float> [[VEC_PHI]]
1324-
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
1325-
; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1326-
; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
1327-
; IF-EVL: middle.block:
1328-
; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> [[TMP5]])
1329-
; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1330-
; IF-EVL: scalar.ph:
1331-
; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1332-
; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[START]], [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
13331267
; IF-EVL-NEXT: br label [[FOR_BODY:%.*]]
13341268
; IF-EVL: for.body:
1335-
; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1336-
; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MAX:%.*]], [[FOR_BODY]] ]
1337-
; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1338-
; IF-EVL-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1339-
; IF-EVL-NEXT: [[MAX]] = tail call float @llvm.maximum.f32(float [[RDX]], float [[TMP8]])
1269+
; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1270+
; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START:%.*]], [[ENTRY]] ], [ [[MAX:%.*]], [[FOR_BODY]] ]
1271+
; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]]
1272+
; IF-EVL-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1273+
; IF-EVL-NEXT: [[MAX]] = tail call float @llvm.maximum.f32(float [[RDX]], float [[TMP0]])
13401274
; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1341-
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1342-
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
1275+
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]]
1276+
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4]]
13431277
; IF-EVL: for.end:
1344-
; IF-EVL-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
1278+
; IF-EVL-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY]] ]
13451279
; IF-EVL-NEXT: ret float [[MAX_LCSSA]]
13461280
;
13471281
; NO-VP-LABEL: @fmaximum(
@@ -1426,7 +1360,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) {
14261360
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP24]], [[EVL_BASED_IV]]
14271361
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]]
14281362
; IF-EVL-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1429-
; IF-EVL-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
1363+
; IF-EVL-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
14301364
; IF-EVL: middle.block:
14311365
; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
14321366
; IF-EVL: scalar.ph:
@@ -1443,7 +1377,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) {
14431377
; IF-EVL-NEXT: [[MULADD]] = tail call reassoc float @llvm.fmuladd.f32(float [[TMP26]], float [[TMP27]], float [[RDX]])
14441378
; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
14451379
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1446-
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
1380+
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
14471381
; IF-EVL: for.end:
14481382
; IF-EVL-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ]
14491383
; IF-EVL-NEXT: ret float [[MULADD_LCSSA]]

0 commit comments

Comments
 (0)