@@ -1106,6 +1106,89 @@ for.end:
1106
1106
ret float %muladd
1107
1107
}
1108
1108
1109
+ define float @reduction_fmuladd_blend (ptr %a , ptr %b , i64 %n , i1 %c ) {
1110
+ ; CHECK-LABEL: @reduction_fmuladd_blend(
1111
+ ; CHECK-NEXT: entry:
1112
+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
1113
+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1114
+ ; CHECK: vector.ph:
1115
+ ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4
1116
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C:%.*]], i64 0
1117
+ ; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLATINSERT]], <i1 true, i1 poison, i1 poison, i1 poison>
1118
+ ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[TMP0]], <4 x i1> poison, <4 x i32> zeroinitializer
1119
+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1120
+ ; CHECK: vector.body:
1121
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1122
+ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
1123
+ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
1124
+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
1125
+ ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
1126
+ ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
1127
+ ; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
1128
+ ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[TMP4]], <4 x float> splat (float -0.000000e+00)
1129
+ ; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]])
1130
+ ; CHECK-NEXT: [[TMP7]] = fadd float [[TMP6]], [[VEC_PHI]]
1131
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1132
+ ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1133
+ ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
1134
+ ; CHECK: middle.block:
1135
+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1136
+ ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1137
+ ; CHECK: scalar.ph:
1138
+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1139
+ ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ]
1140
+ ; CHECK-NEXT: br label [[FOR_BODY:%.*]]
1141
+ ; CHECK: loop.header:
1142
+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
1143
+ ; CHECK-NEXT: [[SUM:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[LATCH]] ]
1144
+ ; CHECK-NEXT: br i1 [[C]], label [[FOO:%.*]], label [[BAR:%.*]]
1145
+ ; CHECK: if:
1146
+ ; CHECK-NEXT: br label [[LATCH]]
1147
+ ; CHECK: else:
1148
+ ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]]
1149
+ ; CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
1150
+ ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1151
+ ; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1152
+ ; CHECK-NEXT: [[MULADD:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP10]], float [[TMP9]], float [[SUM]])
1153
+ ; CHECK-NEXT: br label [[LATCH]]
1154
+ ; CHECK: latch:
1155
+ ; CHECK-NEXT: [[SUM_NEXT]] = phi float [ [[SUM]], [[FOO]] ], [ [[MULADD]], [[BAR]] ]
1156
+ ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1157
+ ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1158
+ ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]]
1159
+ ; CHECK: exit:
1160
+ ; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi float [ [[SUM_NEXT]], [[LATCH]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
1161
+ ; CHECK-NEXT: ret float [[SUM_NEXT_LCSSA]]
1162
+ ;
1163
+ entry:
1164
+ br label %loop.header
1165
+
1166
+ loop.header:
1167
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %latch ]
1168
+ %sum = phi float [ 0 .000000e+00 , %entry ], [ %sum.next , %latch ]
1169
+ %arrayidx = getelementptr inbounds float , ptr %a , i64 %iv
1170
+ %0 = load float , ptr %arrayidx , align 4
1171
+ %arrayidx2 = getelementptr inbounds float , ptr %b , i64 %iv
1172
+ %1 = load float , ptr %arrayidx2 , align 4
1173
+ br i1 %c , label %if , label %else
1174
+
1175
+ if:
1176
+ br label %latch
1177
+
1178
+ else:
1179
+ %muladd = tail call float @llvm.fmuladd.f32 (float %0 , float %1 , float %sum )
1180
+ br label %latch
1181
+
1182
+ latch:
1183
+ %sum.next = phi float [ %sum , %if ], [ %muladd , %else ]
1184
+ %iv.next = add nuw nsw i64 %iv , 1
1185
+ %exitcond.not = icmp eq i64 %iv.next , %n
1186
+ br i1 %exitcond.not , label %exit , label %loop.header
1187
+
1188
+ exit:
1189
+ ret float %sum.next
1190
+ }
1191
+
1109
1192
; This case was previously failing verification due to the mask for the
1110
1193
; reduction being created after the reduction.
1111
1194
define i32 @predicated_not_dominates_reduction (ptr nocapture noundef readonly %h , i32 noundef %i ) {
@@ -1130,7 +1213,7 @@ define i32 @predicated_not_dominates_reduction(ptr nocapture noundef readonly %h
1130
1213
; CHECK-NEXT: [[TMP7]] = add i32 [[TMP6]], [[VEC_PHI]]
1131
1214
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
1132
1215
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
1133
- ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38 :![0-9]+]]
1216
+ ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40 :![0-9]+]]
1134
1217
; CHECK: middle.block:
1135
1218
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I]], [[N_VEC]]
1136
1219
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END7:%.*]], label [[SCALAR_PH]]
@@ -1157,7 +1240,7 @@ define i32 @predicated_not_dominates_reduction(ptr nocapture noundef readonly %h
1157
1240
; CHECK-NEXT: [[G_1]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ [[G_016]], [[FOR_BODY2]] ]
1158
1241
; CHECK-NEXT: [[INC6]] = add nuw nsw i32 [[A_117]], 1
1159
1242
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC6]], [[I]]
1160
- ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END7]], label [[FOR_BODY2]], !llvm.loop [[LOOP39 :![0-9]+]]
1243
+ ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END7]], label [[FOR_BODY2]], !llvm.loop [[LOOP41 :![0-9]+]]
1161
1244
; CHECK: for.end7:
1162
1245
; CHECK-NEXT: [[G_1_LCSSA:%.*]] = phi i32 [ [[G_1]], [[FOR_INC5]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
1163
1246
; CHECK-NEXT: ret i32 [[G_1_LCSSA]]
@@ -1219,7 +1302,7 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read
1219
1302
; CHECK-NEXT: [[TMP11]] = add i32 [[TMP10]], [[TMP8]]
1220
1303
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
1221
1304
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
1222
- ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40 :![0-9]+]]
1305
+ ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42 :![0-9]+]]
1223
1306
; CHECK: middle.block:
1224
1307
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I]], [[N_VEC]]
1225
1308
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END7:%.*]], label [[SCALAR_PH]]
@@ -1247,7 +1330,7 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read
1247
1330
; CHECK-NEXT: [[G_1]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ [[G_016]], [[FOR_BODY2]] ]
1248
1331
; CHECK-NEXT: [[INC6]] = add nuw nsw i32 [[A_117]], 1
1249
1332
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC6]], [[I]]
1250
- ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END7]], label [[FOR_BODY2]], !llvm.loop [[LOOP41 :![0-9]+]]
1333
+ ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END7]], label [[FOR_BODY2]], !llvm.loop [[LOOP43 :![0-9]+]]
1251
1334
; CHECK: for.end7:
1252
1335
; CHECK-NEXT: [[G_1_LCSSA:%.*]] = phi i32 [ [[G_1]], [[FOR_INC5]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
1253
1336
; CHECK-NEXT: ret i32 [[G_1_LCSSA]]
@@ -1362,7 +1445,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) {
1362
1445
; CHECK-NEXT: [[TMP48]] = add i32 [[VEC_PHI]], [[TMP47]]
1363
1446
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
1364
1447
; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
1365
- ; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42 :![0-9]+]]
1448
+ ; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44 :![0-9]+]]
1366
1449
; CHECK: middle.block:
1367
1450
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
1368
1451
; CHECK: scalar.ph:
@@ -1377,7 +1460,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) {
1377
1460
; CHECK: if.then:
1378
1461
; CHECK-NEXT: br label [[FOR_INC]]
1379
1462
; CHECK: for.inc:
1380
- ; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP43 :![0-9]+]]
1463
+ ; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP45 :![0-9]+]]
1381
1464
;
1382
1465
entry:
1383
1466
br label %for.body
0 commit comments