Skip to content

Commit cd43483

Browse files
committed
[VPlan] Sink cases where no truncate is needed in truncateMinimalBWs.
MinBWs contains entries that specify the minimum required bitwidth. In some cases, the old and new bitwidths can be equal (see test case) and in those cases no truncations are needed, so skip those cases. Fixes #74307.
1 parent e309667 commit cd43483

File tree

2 files changed

+107
-27
lines changed

2 files changed

+107
-27
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -935,6 +935,8 @@ void VPlanTransforms::truncateToMinimalBitwidths(
935935
Type *OldResTy = TypeInfo.inferScalarType(ResultVPV);
936936
unsigned OldResSizeInBits = OldResTy->getScalarSizeInBits();
937937
assert(OldResTy->isIntegerTy() && "only integer types supported");
938+
if (OldResSizeInBits == NewResSizeInBits)
939+
continue;
938940
assert(OldResSizeInBits > NewResSizeInBits && "Nothing to shrink?");
939941
(void)OldResSizeInBits;
940942

llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll

Lines changed: 105 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -160,49 +160,49 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i
160160
; CHECK: vector.main.loop.iter.check:
161161
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
162162
; CHECK: vector.ph:
163-
; CHECK-NEXT: [[T:%.*]] = trunc i32 [[A]] to i16
164-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> undef, i16 [[T]], i64 0
165-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
166-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i16> undef, i16 [[B]], i64 0
167-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT1]], <16 x i16> poison, <16 x i32> zeroinitializer
163+
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[A]] to i16
164+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i16> undef, i16 [[TMP0]], i64 0
165+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> poison, <16 x i32> zeroinitializer
166+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i16> undef, i16 [[B]], i64 0
167+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <16 x i32> zeroinitializer
168168
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
169169
; CHECK: vector.body:
170170
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
171-
; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i16> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]]
172-
; CHECK-NEXT: [[TMP5:%.*]] = mul <16 x i16> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]]
173-
; CHECK-NEXT: [[TMP6:%.*]] = lshr <16 x i16> [[TMP4]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
171+
; CHECK-NEXT: [[TMP5:%.*]] = mul <16 x i16> [[TMP2]], [[TMP4]]
172+
; CHECK-NEXT: [[TMP6:%.*]] = mul <16 x i16> [[TMP2]], [[TMP4]]
174173
; CHECK-NEXT: [[TMP7:%.*]] = lshr <16 x i16> [[TMP5]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
175-
; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i16> [[TMP6]] to <16 x i8>
174+
; CHECK-NEXT: [[TMP8:%.*]] = lshr <16 x i16> [[TMP6]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
176175
; CHECK-NEXT: [[TMP9:%.*]] = trunc <16 x i16> [[TMP7]] to <16 x i8>
177-
; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[INDEX]] to i64
178-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP10]]
179-
; CHECK-NEXT: store <16 x i8> [[TMP8]], ptr [[TMP11]], align 1
180-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 16
176+
; CHECK-NEXT: [[TMP10:%.*]] = trunc <16 x i16> [[TMP8]] to <16 x i8>
177+
; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[INDEX]] to i64
178+
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP11]]
181179
; CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP12]], align 1
180+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 16
181+
; CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP13]], align 1
182182
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
183-
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
184-
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
183+
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
184+
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
185185
; CHECK: middle.block:
186186
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
187187
; CHECK: vec.epilog.iter.check:
188188
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
189189
; CHECK: vec.epilog.ph:
190-
; CHECK-NEXT: [[TMP14:%.*]] = trunc i32 [[A]] to i16
191-
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i16> undef, i16 [[TMP14]], i64 0
192-
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x i16> undef, i16 [[B]], i64 0
190+
; CHECK-NEXT: [[TMP15:%.*]] = trunc i32 [[A]] to i16
191+
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x i16> undef, i16 [[TMP15]], i64 0
192+
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x i16> undef, i16 [[B]], i64 0
193193
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
194194
; CHECK: vec.epilog.vector.body:
195195
; CHECK-NEXT: [[INDEX7:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
196-
; CHECK-NEXT: [[TMP17:%.*]] = mul <8 x i16> [[TMP15]], [[TMP16]]
197-
; CHECK-NEXT: [[TMP18:%.*]] = lshr <8 x i16> [[TMP17]], <i16 8, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
198-
; CHECK-NEXT: [[TMP19:%.*]] = trunc <8 x i16> [[TMP18]] to <8 x i8>
199-
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x i8> [[TMP19]], <8 x i8> poison, <8 x i32> zeroinitializer
200-
; CHECK-NEXT: [[TMP21:%.*]] = sext i32 [[INDEX7]] to i64
201-
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP21]]
202-
; CHECK-NEXT: store <8 x i8> [[TMP20]], ptr [[TMP22]], align 1
196+
; CHECK-NEXT: [[TMP18:%.*]] = mul <8 x i16> [[TMP16]], [[TMP17]]
197+
; CHECK-NEXT: [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], <i16 8, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
198+
; CHECK-NEXT: [[TMP20:%.*]] = trunc <8 x i16> [[TMP19]] to <8 x i8>
199+
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i8> [[TMP20]], <8 x i8> poison, <8 x i32> zeroinitializer
200+
; CHECK-NEXT: [[TMP22:%.*]] = sext i32 [[INDEX7]] to i64
201+
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP22]]
202+
; CHECK-NEXT: store <8 x i8> [[TMP21]], ptr [[TMP23]], align 1
203203
; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i32 [[INDEX7]], 8
204-
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT8]], 1000
205-
; CHECK-NEXT: br i1 [[TMP23]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
204+
; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT8]], 1000
205+
; CHECK-NEXT: br i1 [[TMP24]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
206206
; CHECK: vec.epilog.middle.block:
207207
; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
208208
; CHECK: vec.epilog.scalar.ph:
@@ -386,6 +386,35 @@ exit:
386386

387387
; Test case for #74231.
388388
define void @replicate_operands_in_with_operands_in_minbws(ptr %dst, ptr noalias %src.1, ptr noalias %src.2, i32 %x) {
389+
; CHECK-LABEL: define void @replicate_operands_in_with_operands_in_minbws
390+
; CHECK-SAME: (ptr [[DST:%.*]], ptr noalias [[SRC_1:%.*]], ptr noalias [[SRC_2:%.*]], i32 [[X:%.*]]) {
391+
; CHECK-NEXT: entry:
392+
; CHECK-NEXT: [[SUB:%.*]] = add i32 [[X]], 65526
393+
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
394+
; CHECK: loop.header:
395+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
396+
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i64 [[IV]]
397+
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4
398+
; CHECK-NEXT: [[C_1:%.*]] = icmp eq i32 [[L]], 10
399+
; CHECK-NEXT: br i1 [[C_1]], label [[LOOP_LATCH]], label [[IF_THEN:%.*]]
400+
; CHECK: if.then:
401+
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i16, ptr [[SRC_2]], i64 [[IV]]
402+
; CHECK-NEXT: [[L_2:%.*]] = load i16, ptr [[GEP_SRC_2]], align 2
403+
; CHECK-NEXT: [[C_2:%.*]] = icmp ult i16 [[L_2]], 100
404+
; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[L_2]] to i32
405+
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C_2]], i32 [[SUB]], i32 [[CONV]]
406+
; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[SEL]] to i16
407+
; CHECK-NEXT: [[TRUNC:%.*]] = add i16 [[L_2]], [[TMP0]]
408+
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
409+
; CHECK-NEXT: store i16 [[TRUNC]], ptr [[GEP_DST]], align 2
410+
; CHECK-NEXT: br label [[LOOP_LATCH]]
411+
; CHECK: loop.latch:
412+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
413+
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
414+
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[EXIT:%.*]], label [[LOOP_HEADER]]
415+
; CHECK: exit:
416+
; CHECK-NEXT: ret void
417+
;
389418
entry:
390419
%sub = sub i32 %x, 10
391420
br label %loop.header
@@ -417,3 +446,52 @@ loop.latch:
417446
exit:
418447
ret void
419448
}
449+
450+
; Test case for #74307.
451+
define void @old_and_new_size_equalko(ptr noalias %src, ptr noalias %dst) {
452+
; CHECK-LABEL: define void @old_and_new_size_equalko
453+
; CHECK-SAME: (ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
454+
; CHECK-NEXT: entry:
455+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
456+
; CHECK: vector.ph:
457+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
458+
; CHECK: vector.body:
459+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
460+
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
461+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]]
462+
; CHECK-NEXT: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[TMP1]], align 4
463+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4
464+
; CHECK-NEXT: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[TMP2]], align 4
465+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
466+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
467+
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
468+
; CHECK: middle.block:
469+
; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
470+
; CHECK: scalar.ph:
471+
; CHECK-NEXT: br label [[LOOP:%.*]]
472+
; CHECK: loop:
473+
; CHECK-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
474+
; CHECK: exit:
475+
; CHECK-NEXT: ret void
476+
;
477+
entry:
478+
br label %loop
479+
480+
loop:
481+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
482+
%gep.src = getelementptr inbounds i64, ptr %src, i32 %iv
483+
%l = load i64, ptr %gep.src
484+
%cmp = icmp sle i64 %l, 1
485+
%ext = zext i1 %cmp to i64
486+
%cmp3 = icmp sle i64 %ext, -10
487+
%or = or i64 1, %ext
488+
%trunc = trunc i64 %or to i32
489+
%gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
490+
store i32 %trunc, ptr %gep.dst, align 4
491+
%iv.next = add i32 %iv, 1
492+
%ec = icmp eq i32 %iv.next, 1000
493+
br i1 %ec, label %exit, label %loop
494+
495+
exit:
496+
ret void
497+
}

0 commit comments

Comments
 (0)