@@ -1214,18 +1214,51 @@ for.end:
1214
1214
define float @fminimum (ptr %a , i64 %n , float %start ) {
1215
1215
; IF-EVL-LABEL: @fminimum(
1216
1216
; IF-EVL-NEXT: entry:
1217
+ ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1218
+ ; IF-EVL: vector.ph:
1219
+ ; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], 7
1220
+ ; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8
1221
+ ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1222
+ ; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
1223
+ ; IF-EVL-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[START:%.*]], i64 0
1224
+ ; IF-EVL-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <8 x float> [[MINMAX_IDENT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
1225
+ ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
1226
+ ; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer
1227
+ ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
1228
+ ; IF-EVL: vector.body:
1229
+ ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1230
+ ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
1231
+ ; IF-EVL-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1232
+ ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0
1233
+ ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
1234
+ ; IF-EVL-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
1235
+ ; IF-EVL-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], [[BROADCAST_SPLAT2]]
1236
+ ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]]
1237
+ ; IF-EVL-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
1238
+ ; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x float> poison)
1239
+ ; IF-EVL-NEXT: [[TMP4]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_MASKED_LOAD]])
1240
+ ; IF-EVL-NEXT: [[TMP5:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[TMP4]], <8 x float> [[VEC_PHI]]
1241
+ ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
1242
+ ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1243
+ ; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
1244
+ ; IF-EVL: middle.block:
1245
+ ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[TMP5]])
1246
+ ; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1247
+ ; IF-EVL: scalar.ph:
1248
+ ; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1249
+ ; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[START]], [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
1217
1250
; IF-EVL-NEXT: br label [[FOR_BODY:%.*]]
1218
1251
; IF-EVL: for.body:
1219
- ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0 , [[ENTRY:%.* ]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1220
- ; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START:%.* ]], [[ENTRY ]] ], [ [[MIN:%.*]], [[FOR_BODY]] ]
1221
- ; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.* ]], i64 [[IV]]
1222
- ; IF-EVL-NEXT: [[TMP0 :%.*]] = load float, ptr [[ARRAYIDX]], align 4
1223
- ; IF-EVL-NEXT: [[MIN]] = tail call float @llvm.minimum.f32(float [[RDX]], float [[TMP0 ]])
1252
+ ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]] , [[SCALAR_PH ]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1253
+ ; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX ]], [[SCALAR_PH ]] ], [ [[MIN:%.*]], [[FOR_BODY]] ]
1254
+ ; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1255
+ ; IF-EVL-NEXT: [[TMP8 :%.*]] = load float, ptr [[ARRAYIDX]], align 4
1256
+ ; IF-EVL-NEXT: [[MIN]] = tail call float @llvm.minimum.f32(float [[RDX]], float [[TMP8 ]])
1224
1257
; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1225
- ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.* ]]
1226
- ; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.* ]], label [[FOR_BODY]], !llvm.loop [[LOOP4 ]]
1258
+ ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1259
+ ; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+ ]]
1227
1260
; IF-EVL: for.end:
1228
- ; IF-EVL-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY]] ]
1261
+ ; IF-EVL-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
1229
1262
; IF-EVL-NEXT: ret float [[MIN_LCSSA]]
1230
1263
;
1231
1264
; NO-VP-LABEL: @fminimum(
@@ -1264,18 +1297,51 @@ for.end:
1264
1297
define float @fmaximum (ptr %a , i64 %n , float %start ) {
1265
1298
; IF-EVL-LABEL: @fmaximum(
1266
1299
; IF-EVL-NEXT: entry:
1300
+ ; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1301
+ ; IF-EVL: vector.ph:
1302
+ ; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], 7
1303
+ ; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8
1304
+ ; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1305
+ ; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1
1306
+ ; IF-EVL-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[START:%.*]], i64 0
1307
+ ; IF-EVL-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <8 x float> [[MINMAX_IDENT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
1308
+ ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
1309
+ ; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer
1310
+ ; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]]
1311
+ ; IF-EVL: vector.body:
1312
+ ; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1313
+ ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
1314
+ ; IF-EVL-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1315
+ ; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0
1316
+ ; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
1317
+ ; IF-EVL-NEXT: [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
1318
+ ; IF-EVL-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], [[BROADCAST_SPLAT2]]
1319
+ ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]]
1320
+ ; IF-EVL-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
1321
+ ; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x float> poison)
1322
+ ; IF-EVL-NEXT: [[TMP4]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_MASKED_LOAD]])
1323
+ ; IF-EVL-NEXT: [[TMP5:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[TMP4]], <8 x float> [[VEC_PHI]]
1324
+ ; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
1325
+ ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1326
+ ; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
1327
+ ; IF-EVL: middle.block:
1328
+ ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> [[TMP5]])
1329
+ ; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1330
+ ; IF-EVL: scalar.ph:
1331
+ ; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1332
+ ; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[START]], [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
1267
1333
; IF-EVL-NEXT: br label [[FOR_BODY:%.*]]
1268
1334
; IF-EVL: for.body:
1269
- ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0 , [[ENTRY:%.* ]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1270
- ; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START:%.* ]], [[ENTRY ]] ], [ [[MAX:%.*]], [[FOR_BODY]] ]
1271
- ; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.* ]], i64 [[IV]]
1272
- ; IF-EVL-NEXT: [[TMP0 :%.*]] = load float, ptr [[ARRAYIDX]], align 4
1273
- ; IF-EVL-NEXT: [[MAX]] = tail call float @llvm.maximum.f32(float [[RDX]], float [[TMP0 ]])
1335
+ ; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]] , [[SCALAR_PH ]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1336
+ ; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX ]], [[SCALAR_PH ]] ], [ [[MAX:%.*]], [[FOR_BODY]] ]
1337
+ ; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1338
+ ; IF-EVL-NEXT: [[TMP8 :%.*]] = load float, ptr [[ARRAYIDX]], align 4
1339
+ ; IF-EVL-NEXT: [[MAX]] = tail call float @llvm.maximum.f32(float [[RDX]], float [[TMP8 ]])
1274
1340
; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1275
- ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.* ]]
1276
- ; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.* ]], label [[FOR_BODY]], !llvm.loop [[LOOP4 ]]
1341
+ ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1342
+ ; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+ ]]
1277
1343
; IF-EVL: for.end:
1278
- ; IF-EVL-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY]] ]
1344
+ ; IF-EVL-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
1279
1345
; IF-EVL-NEXT: ret float [[MAX_LCSSA]]
1280
1346
;
1281
1347
; NO-VP-LABEL: @fmaximum(
@@ -1360,7 +1426,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) {
1360
1426
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP24]], [[EVL_BASED_IV]]
1361
1427
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]]
1362
1428
; IF-EVL-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1363
- ; IF-EVL-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26 :![0-9]+]]
1429
+ ; IF-EVL-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30 :![0-9]+]]
1364
1430
; IF-EVL: middle.block:
1365
1431
; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1366
1432
; IF-EVL: scalar.ph:
@@ -1377,7 +1443,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) {
1377
1443
; IF-EVL-NEXT: [[MULADD]] = tail call reassoc float @llvm.fmuladd.f32(float [[TMP26]], float [[TMP27]], float [[RDX]])
1378
1444
; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1379
1445
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1380
- ; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27 :![0-9]+]]
1446
+ ; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP31 :![0-9]+]]
1381
1447
; IF-EVL: for.end:
1382
1448
; IF-EVL-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ]
1383
1449
; IF-EVL-NEXT: ret float [[MULADD_LCSSA]]
0 commit comments