@@ -373,21 +373,57 @@ exit:
373
373
ret void
374
374
}
375
375
376
- ; TODO: We should be able to sink %tmp38 after %tmp60 .
376
+ ; Sink %tmp38 after %tmp60, then it enable the loop vectorization .
377
377
define void @instruction_with_2_FOR_operands () {
378
378
; CHECK-LABEL: @instruction_with_2_FOR_operands(
379
379
; CHECK-NEXT: bb:
380
+ ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 undef, i64 0)
381
+ ; CHECK-NEXT: [[TMP0:%.*]] = add nuw i64 [[SMAX]], 1
382
+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
383
+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
384
+ ; CHECK: vector.ph:
385
+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
386
+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
387
+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
388
+ ; CHECK: vector.body:
389
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
390
+ ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x float> [ <float poison, float poison, float poison, float undef>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT3:%.*]], [[VECTOR_BODY]] ]
391
+ ; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x float> [ <float poison, float poison, float poison, float undef>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ]
392
+ ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
393
+ ; CHECK-NEXT: [[TMP2:%.*]] = load float, float* undef, align 4
394
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i32 0
395
+ ; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
396
+ ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR1]], <4 x float> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
397
+ ; CHECK-NEXT: [[TMP4:%.*]] = load float, float* undef, align 4
398
+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP4]], i32 0
399
+ ; CHECK-NEXT: [[BROADCAST_SPLAT3]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
400
+ ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR]], <4 x float> [[BROADCAST_SPLAT3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
401
+ ; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP3]]
402
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
403
+ ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
404
+ ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
405
+ ; CHECK: middle.block:
406
+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
407
+ ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT3]], i32 3
408
+ ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT3]], i32 2
409
+ ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT4:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT]], i32 3
410
+ ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI5:%.*]] = extractelement <4 x float> [[BROADCAST_SPLAT]], i32 2
411
+ ; CHECK-NEXT: br i1 [[CMP_N]], label [[BB74:%.*]], label [[SCALAR_PH]]
412
+ ; CHECK: scalar.ph:
413
+ ; CHECK-NEXT: [[SCALAR_RECUR_INIT6:%.*]] = phi float [ undef, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT4]], [[MIDDLE_BLOCK]] ]
414
+ ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ undef, [[BB]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
415
+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
380
416
; CHECK-NEXT: br label [[BB13:%.*]]
381
417
; CHECK: bb13:
382
- ; CHECK-NEXT: [[TMP37 :%.*]] = phi float [ [[TMP60:%.*]], [[BB13]] ], [ undef , [[BB:%.* ]] ]
383
- ; CHECK-NEXT: [[TMP27 :%.*]] = phi float [ [[TMP49:%.*]], [[BB13]] ], [ undef , [[BB ]] ]
384
- ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BB13]] ], [ 0 , [[BB ]] ]
385
- ; CHECK-NEXT: [[TMP38:%.*]] = fmul fast float [[TMP37 ]], [[TMP27 ]]
418
+ ; CHECK-NEXT: [[SCALAR_RECUR :%.*]] = phi float [ [[TMP60:%.*]], [[BB13]] ], [ [[SCALAR_RECUR_INIT]] , [[SCALAR_PH ]] ]
419
+ ; CHECK-NEXT: [[SCALAR_RECUR7 :%.*]] = phi float [ [[TMP49:%.*]], [[BB13]] ], [ [[SCALAR_RECUR_INIT6]] , [[SCALAR_PH ]] ]
420
+ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BB13]] ], [ [[BC_RESUME_VAL]] , [[SCALAR_PH ]] ]
421
+ ; CHECK-NEXT: [[TMP38:%.*]] = fmul fast float [[SCALAR_RECUR ]], [[SCALAR_RECUR7 ]]
386
422
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
387
423
; CHECK-NEXT: [[TMP49]] = load float, float* undef, align 4
388
424
; CHECK-NEXT: [[TMP60]] = load float, float* undef, align 4
389
425
; CHECK-NEXT: [[TMP12:%.*]] = icmp slt i64 [[INDVARS_IV]], undef
390
- ; CHECK-NEXT: br i1 [[TMP12]], label [[BB13]], label [[BB74:%.* ]]
426
+ ; CHECK-NEXT: br i1 [[TMP12]], label [[BB13]], label [[BB74]], !llvm.loop [[LOOP9:![0-9]+ ]]
391
427
; CHECK: bb74:
392
428
; CHECK-NEXT: ret void
393
429
;
0 commit comments