@@ -497,49 +497,21 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
497
497
;
498
498
; TF-FIXEDLEN-LABEL: @conditional_uniform_load(
499
499
; TF-FIXEDLEN-NEXT: entry:
500
- ; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
501
- ; TF-FIXEDLEN: vector.ph:
502
- ; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[B:%.*]], i64 0
503
- ; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer
504
- ; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]]
505
- ; TF-FIXEDLEN: vector.body:
506
- ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
507
- ; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
508
- ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
509
- ; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025)
510
- ; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], <i64 10, i64 10, i64 10, i64 10>
511
- ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer
512
- ; TF-FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP2]], <4 x i64> poison)
513
- ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[TMP1]], <i1 true, i1 true, i1 true, i1 true>
514
- ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
515
- ; TF-FIXEDLEN-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP2]], [[TMP4]]
516
- ; TF-FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer
517
- ; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
518
- ; TF-FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
519
- ; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[PREDPHI]], ptr [[TMP7]], i32 8, <4 x i1> [[TMP6]])
520
- ; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
521
- ; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
522
- ; TF-FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028
523
- ; TF-FIXEDLEN-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
524
- ; TF-FIXEDLEN: middle.block:
525
- ; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
526
- ; TF-FIXEDLEN: scalar.ph:
527
- ; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
528
500
; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]]
529
501
; TF-FIXEDLEN: for.body:
530
- ; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]] , [[SCALAR_PH ]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
502
+ ; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ 0 , [[ENTRY:%.* ]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ]
531
503
; TF-FIXEDLEN-NEXT: [[CMP:%.*]] = icmp ugt i64 [[IV]], 10
532
504
; TF-FIXEDLEN-NEXT: br i1 [[CMP]], label [[DO_LOAD:%.*]], label [[LATCH]]
533
505
; TF-FIXEDLEN: do_load:
534
- ; TF-FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8
506
+ ; TF-FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B:%.* ]], align 8
535
507
; TF-FIXEDLEN-NEXT: br label [[LATCH]]
536
508
; TF-FIXEDLEN: latch:
537
509
; TF-FIXEDLEN-NEXT: [[PHI:%.*]] = phi i64 [ 0, [[FOR_BODY]] ], [ [[V]], [[DO_LOAD]] ]
538
- ; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
510
+ ; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.* ]], i64 [[IV]]
539
511
; TF-FIXEDLEN-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8
540
512
; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
541
513
; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025
542
- ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+ ]]
514
+ ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.* ]], label [[FOR_BODY]]
543
515
; TF-FIXEDLEN: for.end:
544
516
; TF-FIXEDLEN-NEXT: ret void
545
517
;
@@ -708,7 +680,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt
708
680
; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]])
709
681
; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
710
682
; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028
711
- ; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6 :![0-9]+]]
683
+ ; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4 :![0-9]+]]
712
684
; TF-FIXEDLEN: middle.block:
713
685
; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
714
686
; TF-FIXEDLEN: scalar.ph:
@@ -721,7 +693,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt
721
693
; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8
722
694
; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
723
695
; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025
724
- ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7 :![0-9]+]]
696
+ ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5 :![0-9]+]]
725
697
; TF-FIXEDLEN: for.end:
726
698
; TF-FIXEDLEN-NEXT: ret void
727
699
;
@@ -883,7 +855,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
883
855
; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]])
884
856
; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
885
857
; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028
886
- ; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8 :![0-9]+]]
858
+ ; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6 :![0-9]+]]
887
859
; TF-FIXEDLEN: middle.block:
888
860
; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
889
861
; TF-FIXEDLEN: scalar.ph:
@@ -896,7 +868,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i
896
868
; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8
897
869
; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
898
870
; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025
899
- ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9 :![0-9]+]]
871
+ ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7 :![0-9]+]]
900
872
; TF-FIXEDLEN: for.end:
901
873
; TF-FIXEDLEN-NEXT: ret void
902
874
;
@@ -1114,7 +1086,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
1114
1086
; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP9]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]])
1115
1087
; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
1116
1088
; TF-FIXEDLEN-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028
1117
- ; TF-FIXEDLEN-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10 :![0-9]+]]
1089
+ ; TF-FIXEDLEN-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8 :![0-9]+]]
1118
1090
; TF-FIXEDLEN: middle.block:
1119
1091
; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1120
1092
; TF-FIXEDLEN: scalar.ph:
@@ -1127,7 +1099,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias
1127
1099
; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8
1128
1100
; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1129
1101
; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025
1130
- ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11 :![0-9]+]]
1102
+ ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9 :![0-9]+]]
1131
1103
; TF-FIXEDLEN: for.end:
1132
1104
; TF-FIXEDLEN-NEXT: ret void
1133
1105
;
@@ -1353,7 +1325,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
1353
1325
; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
1354
1326
; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
1355
1327
; TF-FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028
1356
- ; TF-FIXEDLEN-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12 :![0-9]+]]
1328
+ ; TF-FIXEDLEN-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10 :![0-9]+]]
1357
1329
; TF-FIXEDLEN: middle.block:
1358
1330
; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1359
1331
; TF-FIXEDLEN: scalar.ph:
@@ -1371,7 +1343,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
1371
1343
; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8
1372
1344
; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1373
1345
; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025
1374
- ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13 :![0-9]+]]
1346
+ ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11 :![0-9]+]]
1375
1347
; TF-FIXEDLEN: for.end:
1376
1348
; TF-FIXEDLEN-NEXT: ret void
1377
1349
;
@@ -1539,7 +1511,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap
1539
1511
; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]])
1540
1512
; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
1541
1513
; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028
1542
- ; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14 :![0-9]+]]
1514
+ ; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12 :![0-9]+]]
1543
1515
; TF-FIXEDLEN: middle.block:
1544
1516
; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1545
1517
; TF-FIXEDLEN: scalar.ph:
@@ -1552,7 +1524,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap
1552
1524
; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8
1553
1525
; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1554
1526
; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025
1555
- ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15 :![0-9]+]]
1527
+ ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13 :![0-9]+]]
1556
1528
; TF-FIXEDLEN: for.end:
1557
1529
; TF-FIXEDLEN-NEXT: ret void
1558
1530
;
0 commit comments