@@ -310,24 +310,49 @@ for.exit:
310
310
ret void
311
311
}
312
312
313
- ;; Need to support legalization of smaller int types.
314
313
define void @histogram_8bit (ptr noalias %buckets , ptr readonly %indices , i64 %N ) #0 {
315
314
; CHECK-LABEL: define void @histogram_8bit(
316
315
; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
317
316
; CHECK-NEXT: entry:
317
+ ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
318
+ ; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP5]], 2
319
+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP9]]
320
+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]]
321
+ ; CHECK: vector.ph:
322
+ ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
323
+ ; CHECK-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP2]], -4
324
+ ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
325
+ ; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
326
+ ; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
318
327
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
319
- ; CHECK: for .body:
320
- ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.* ]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
328
+ ; CHECK: vector .body:
329
+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
321
330
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
322
- ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
331
+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[ARRAYIDX]], align 4
332
+ ; CHECK-NEXT: [[TMP6:%.*]] = zext <vscale x 4 x i32> [[WIDE_LOAD]] to <vscale x 4 x i64>
333
+ ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[BUCKETS]], <vscale x 4 x i64> [[TMP6]]
334
+ ; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i8(<vscale x 4 x ptr> [[TMP7]], i8 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
335
+ ; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP4]]
336
+ ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]]
337
+ ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
338
+ ; CHECK: middle.block:
339
+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
340
+ ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
341
+ ; CHECK: scalar.ph:
342
+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ]
343
+ ; CHECK-NEXT: br label [[FOR_BODY1:%.*]]
344
+ ; CHECK: for.body:
345
+ ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ]
346
+ ; CHECK-NEXT: [[GEP_INDICES:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV1]]
347
+ ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_INDICES]], align 4
323
348
; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP0]] to i64
324
349
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[BUCKETS]], i64 [[IDXPROM1]]
325
350
; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 4
326
351
; CHECK-NEXT: [[INC:%.*]] = add nsw i8 [[TMP1]], 1
327
352
; CHECK-NEXT: store i8 [[INC]], ptr [[ARRAYIDX2]], align 4
328
- ; CHECK-NEXT: [[IV_NEXT ]] = add nuw nsw i64 [[IV ]], 1
329
- ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT ]], [[N]]
330
- ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.* ]], label [[FOR_BODY ]], !llvm.loop [[LOOP10 :![0-9]+]]
353
+ ; CHECK-NEXT: [[IV_NEXT1 ]] = add nuw nsw i64 [[IV1 ]], 1
354
+ ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT1 ]], [[N]]
355
+ ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY1 ]], !llvm.loop [[LOOP11 :![0-9]+]]
331
356
; CHECK: for.exit:
332
357
; CHECK-NEXT: ret void
333
358
;
@@ -368,7 +393,7 @@ define void @histogram_float(ptr noalias %buckets, ptr readonly %indices, i64 %N
368
393
; CHECK-NEXT: store float [[INC]], ptr [[ARRAYIDX2]], align 4
369
394
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
370
395
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
371
- ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP10 ]]
396
+ ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+ ]]
372
397
; CHECK: for.exit:
373
398
; CHECK-NEXT: ret void
374
399
;
@@ -411,7 +436,7 @@ define void @histogram_varying_increment(ptr noalias %buckets, ptr readonly %ind
411
436
; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
412
437
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
413
438
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
414
- ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP10 ]]
439
+ ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP12 ]]
415
440
; CHECK: for.exit:
416
441
; CHECK-NEXT: ret void
417
442
;
@@ -469,7 +494,7 @@ define void @simple_histogram_user_interleave(ptr noalias %buckets, ptr readonly
469
494
; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP21]], i32 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
470
495
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
471
496
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
472
- ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12 :![0-9]+]]
497
+ ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14 :![0-9]+]]
473
498
; CHECK: middle.block:
474
499
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
475
500
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
@@ -487,7 +512,7 @@ define void @simple_histogram_user_interleave(ptr noalias %buckets, ptr readonly
487
512
; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
488
513
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
489
514
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
490
- ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13 :![0-9]+]]
515
+ ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP15 :![0-9]+]]
491
516
; CHECK: for.exit:
492
517
; CHECK-NEXT: ret void
493
518
;
@@ -539,7 +564,7 @@ define void @histogram_array_3op_gep(i64 noundef %N) #0 {
539
564
; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP11]], i32 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
540
565
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
541
566
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
542
- ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14 :![0-9]+]]
567
+ ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16 :![0-9]+]]
543
568
; CHECK: middle.block:
544
569
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
545
570
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
@@ -557,7 +582,7 @@ define void @histogram_array_3op_gep(i64 noundef %N) #0 {
557
582
; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX6]], align 4
558
583
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
559
584
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
560
- ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP15 :![0-9]+]]
585
+ ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17 :![0-9]+]]
561
586
; CHECK: for.exit:
562
587
; CHECK-NEXT: ret void
563
588
;
@@ -609,7 +634,7 @@ define void @histogram_array_4op_gep_nonzero_const_idx(i64 noundef %N, ptr reado
609
634
; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<vscale x 4 x ptr> [[TMP7]], i32 1, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
610
635
; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP4]]
611
636
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]]
612
- ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP16 :![0-9]+]]
637
+ ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP18 :![0-9]+]]
613
638
; CHECK: middle.block:
614
639
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
615
640
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
@@ -627,7 +652,7 @@ define void @histogram_array_4op_gep_nonzero_const_idx(i64 noundef %N, ptr reado
627
652
; CHECK-NEXT: store i32 [[INC]], ptr [[GEP_BUCKET]], align 4
628
653
; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1
629
654
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N]]
630
- ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP17 :![0-9]+]]
655
+ ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY1]], !llvm.loop [[LOOP19 :![0-9]+]]
631
656
; CHECK: for.exit:
632
657
; CHECK-NEXT: ret void
633
658
;
@@ -676,13 +701,13 @@ define void @simple_histogram_tailfold(ptr noalias %buckets, ptr readonly %indic
676
701
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
677
702
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP6]])
678
703
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i64 0
679
- ; CHECK-NEXT: br i1 [[TMP11]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP18 :![0-9]+]]
704
+ ; CHECK-NEXT: br i1 [[TMP11]], label [[VECTOR_BODY]], label [[MIDDLE_BLOCK:%.*]], !llvm.loop [[LOOP20 :![0-9]+]]
680
705
; CHECK: middle.block:
681
706
; CHECK-NEXT: br i1 true, label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
682
707
; CHECK: scalar.ph:
683
708
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
684
709
; CHECK: for.body:
685
- ; CHECK-NEXT: br i1 poison, label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19 :![0-9]+]]
710
+ ; CHECK-NEXT: br i1 poison, label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP21 :![0-9]+]]
686
711
; CHECK: for.exit:
687
712
; CHECK-NEXT: ret void
688
713
;
@@ -749,7 +774,7 @@ define void @simple_histogram_rtdepcheck(ptr noalias %buckets, ptr %array, ptr %
749
774
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
750
775
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
751
776
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
752
- ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20 :![0-9]+]]
777
+ ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22 :![0-9]+]]
753
778
; CHECK: middle.block:
754
779
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
755
780
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
@@ -770,7 +795,7 @@ define void @simple_histogram_rtdepcheck(ptr noalias %buckets, ptr %array, ptr %
770
795
; CHECK-NEXT: store i32 [[IV_TRUNC]], ptr [[IDX_ADDR]], align 4
771
796
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
772
797
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
773
- ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP21 :![0-9]+]]
798
+ ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP23 :![0-9]+]]
774
799
; CHECK: for.exit:
775
800
; CHECK-NEXT: ret void
776
801
;
@@ -862,7 +887,7 @@ define void @simple_histogram_64b(ptr noalias %buckets, ptr readonly %indices, i
862
887
; CHECK-NEXT: call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<vscale x 2 x ptr> [[TMP6]], i64 1, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
863
888
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
864
889
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
865
- ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22 :![0-9]+]]
890
+ ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24 :![0-9]+]]
866
891
; CHECK: middle.block:
867
892
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
868
893
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
@@ -879,7 +904,7 @@ define void @simple_histogram_64b(ptr noalias %buckets, ptr readonly %indices, i
879
904
; CHECK-NEXT: store i64 [[INC]], ptr [[GEP_BUCKET]], align 4
880
905
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
881
906
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
882
- ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP23 :![0-9]+]]
907
+ ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP25 :![0-9]+]]
883
908
; CHECK: for.exit:
884
909
; CHECK-NEXT: ret void
885
910
;
0 commit comments