Skip to content

Commit 3fe6a06

Browse files
committed
[LV] Check if compare is truncated directly in getInstructionCost.
The current check for truncated compares in getInstructionCost misses cases where either the first or both operands are constants. Check directly if the compare is marked for truncation. In that case, the minimum bitwidth is that of the operands. The patch also adds asserts to ensure that. This fixes a divergence between legacy and VPlan-based cost model, where the legacy cost model incorrectly estimated the cost of compares with truncated operands. Fixes #107171.
1 parent 34f2c9a commit 3fe6a06

File tree

2 files changed

+64
-2
lines changed

2 files changed

+64
-2
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6606,9 +6606,20 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
66066606
case Instruction::ICmp:
66076607
case Instruction::FCmp: {
66086608
Type *ValTy = I->getOperand(0)->getType();
6609+
66096610
Instruction *Op0AsInstruction = dyn_cast<Instruction>(I->getOperand(0));
6610-
if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF))
6611-
ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]);
6611+
(void)Op0AsInstruction;
6612+
assert((!canTruncateToMinimalBitwidth(Op0AsInstruction, VF) ||
6613+
canTruncateToMinimalBitwidth(I, VF)) &&
6614+
"truncating Op0 must imply truncating the compare");
6615+
if (canTruncateToMinimalBitwidth(I, VF)) {
6616+
assert(!canTruncateToMinimalBitwidth(Op0AsInstruction, VF) ||
6617+
MinBWs[I] == MinBWs[Op0AsInstruction] &&
6618+
"if both the operand and the compare are marked for "
6619+
"truncation, they must have the same bitwidth");
6620+
ValTy = IntegerType::get(ValTy->getContext(), MinBWs[I]);
6621+
}
6622+
66126623
VectorTy = ToVectorTy(ValTy, VF);
66136624
return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr,
66146625
cast<CmpInst>(I)->getPredicate(), CostKind,

llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,56 @@ exit:
221221
ret void
222222
}
223223

224+
; Test case for https://github.com/llvm/llvm-project/issues/107171.
225+
define i8 @icmp_ops_narrowed_to_i1() #1 {
226+
; CHECK-LABEL: define i8 @icmp_ops_narrowed_to_i1(
227+
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
228+
; CHECK-NEXT: [[ENTRY:.*]]:
229+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
230+
; CHECK: [[VECTOR_PH]]:
231+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
232+
; CHECK: [[VECTOR_BODY]]:
233+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
234+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
235+
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
236+
; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
237+
; CHECK: [[MIDDLE_BLOCK]]:
238+
; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
239+
; CHECK: [[SCALAR_PH]]:
240+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 96, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
241+
; CHECK-NEXT: br label %[[LOOP:.*]]
242+
; CHECK: [[LOOP]]:
243+
; CHECK-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
244+
; CHECK-NEXT: [[C:%.*]] = icmp eq i8 0, 0
245+
; CHECK-NEXT: [[EXT:%.*]] = zext i1 [[C]] to i64
246+
; CHECK-NEXT: [[SHR:%.*]] = lshr i64 [[EXT]], 1
247+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[SHR]] to i8
248+
; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1
249+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i16 [[IV_NEXT]], 100
250+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
251+
; CHECK: [[EXIT]]:
252+
; CHECK-NEXT: [[TRUNC_LCSSA:%.*]] = phi i8 [ [[TRUNC]], %[[LOOP]] ], [ 0, %[[MIDDLE_BLOCK]] ]
253+
; CHECK-NEXT: ret i8 [[TRUNC_LCSSA]]
254+
;
255+
entry:
256+
br label %loop
257+
258+
loop:
259+
%iv = phi i16 [ 0, %entry ], [ %iv.next, %loop ]
260+
%c = icmp eq i8 0, 0
261+
%ext = zext i1 %c to i64
262+
%shr = lshr i64 %ext, 1
263+
%trunc = trunc i64 %shr to i8
264+
%iv.next = add i16 %iv, 1
265+
%ec = icmp eq i16 %iv.next, 100
266+
br i1 %ec, label %exit, label %loop
267+
268+
exit:
269+
ret i8 %trunc
270+
}
271+
224272
attributes #0 = { "target-features"="+64bit,+v,+zvl256b" }
273+
attributes #1 = { "target-features"="+64bit,+v" }
225274

226275
;.
227276
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
@@ -232,4 +281,6 @@ attributes #0 = { "target-features"="+64bit,+v,+zvl256b" }
232281
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
233282
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
234283
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
284+
; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
285+
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
235286
;.

0 commit comments

Comments
 (0)