Skip to content

Commit 0dacba3

Browse files
committed
[VPlan] Handle truncating ICMPs in truncateToMinimalBWs.
Update truncateToMinimalBitwidths to handle truncating ICMPs. For ICMPs, the new target type will be the same as the original type. In that case, only truncate the operands, but skip the extend. This is in line with what the original truncateToMinimalBitwidths did for compares. Fixes #81415.
1 parent 9e5da05 commit 0dacba3

File tree

2 files changed

+76
-8
lines changed

2 files changed

+76
-8
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -977,9 +977,6 @@ void VPlanTransforms::truncateToMinimalBitwidths(
977977
Type *OldResTy = TypeInfo.inferScalarType(ResultVPV);
978978
unsigned OldResSizeInBits = OldResTy->getScalarSizeInBits();
979979
assert(OldResTy->isIntegerTy() && "only integer types supported");
980-
if (OldResSizeInBits == NewResSizeInBits)
981-
continue;
982-
assert(OldResSizeInBits > NewResSizeInBits && "Nothing to shrink?");
983980
(void)OldResSizeInBits;
984981

985982
auto *NewResTy = IntegerType::get(Ctx, NewResSizeInBits);
@@ -990,11 +987,17 @@ void VPlanTransforms::truncateToMinimalBitwidths(
990987
if (auto *VPW = dyn_cast<VPRecipeWithIRFlags>(&R))
991988
VPW->dropPoisonGeneratingFlags();
992989

993-
// Extend result to original width.
994-
auto *Ext = new VPWidenCastRecipe(Instruction::ZExt, ResultVPV, OldResTy);
995-
Ext->insertAfter(&R);
996-
ResultVPV->replaceAllUsesWith(Ext);
997-
Ext->setOperand(0, ResultVPV);
990+
if (OldResSizeInBits != NewResSizeInBits) {
991+
// Extend result to original width.
992+
auto *Ext =
993+
new VPWidenCastRecipe(Instruction::ZExt, ResultVPV, OldResTy);
994+
Ext->insertAfter(&R);
995+
ResultVPV->replaceAllUsesWith(Ext);
996+
Ext->setOperand(0, ResultVPV);
997+
assert(OldResSizeInBits > NewResSizeInBits && "Nothing to shrink?");
998+
} else
999+
assert(cast<VPWidenRecipe>(&R)->getOpcode() == Instruction::ICmp &&
1000+
"Only ICmps should not need extending the result.");
9981001

9991002
if (isa<VPWidenMemoryInstructionRecipe>(&R)) {
10001003
assert(!cast<VPWidenMemoryInstructionRecipe>(&R)->isStore() && "stores cannot be narrowed");

llvm/test/Transforms/LoopVectorize/trunc-reductions.ll

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,3 +245,68 @@ for.end:
245245
%ret = trunc i32 %min to i16
246246
ret i16 %ret
247247
}
248+
249+
; Test case for https://github.com/llvm/llvm-project/issues/81415.
250+
define i32 @reduction_and_or(i16 %a, i32 %b, ptr %src) {
251+
; CHECK-LABEL: @reduction_and_or(
252+
; CHECK-NEXT: entry:
253+
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
254+
; CHECK: vector.ph:
255+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
256+
; CHECK: vector.body:
257+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
258+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ <i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
259+
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
260+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP0]]
261+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4
262+
; CHECK-NEXT: [[TMP2]] = or <8 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
263+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
264+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
265+
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
266+
; CHECK: middle.block:
267+
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP2]])
268+
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
269+
; CHECK: scalar.ph:
270+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
271+
; CHECK-NEXT: br label [[LOOP:%.*]]
272+
; CHECK: loop:
273+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 992, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
274+
; CHECK-NEXT: [[OR67:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[OR:%.*]], [[LOOP]] ]
275+
; CHECK-NEXT: [[TMP5:%.*]] = zext nneg i32 [[IV]] to i64
276+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP5]]
277+
; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4
278+
; CHECK-NEXT: [[OR]] = or i32 [[OR67]], [[L]]
279+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
280+
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], 999
281+
; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
282+
; CHECK: exit:
283+
; CHECK-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[LOOP]] ], [ poison, [[MIDDLE_BLOCK]] ]
284+
; CHECK-NEXT: ret i32 [[OR_LCSSA]]
285+
;
286+
entry:
287+
%ext1 = zext i16 %a to i32
288+
br label %loop
289+
290+
loop:
291+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
292+
%or67 = phi i32 [ 10, %entry ], [ %or, %loop ]
293+
%t = trunc i32 %b to i16
294+
%ext = sext i16 %t to i32
295+
%cmp = icmp sgt i32 %ext, %ext1
296+
%ext2 = zext i1 %cmp to i32
297+
%cmp3 = icmp sge i32 %iv, %ext2
298+
%ext4 = zext i1 %cmp3 to i32
299+
%div = sdiv i32 %ext4, %b
300+
%and = and i32 %div, 0
301+
%gep = getelementptr inbounds i32, ptr %src, i32 %iv
302+
%l = load i32, ptr %gep
303+
%add = add i32 %and, %l
304+
%or = or i32 %or67, %add
305+
%iv.next = add nsw i32 %iv, 1
306+
%tobool.not = icmp eq i32 %iv.next, 999
307+
br i1 %tobool.not, label %exit, label %loop
308+
309+
exit:
310+
%or.lcssa = phi i32 [ %or, %loop ]
311+
ret i32 %or.lcssa
312+
}

0 commit comments

Comments
 (0)