Skip to content

Commit cf2ecc7

Browse files
committed
[LV] Remove over-aggressive assert from 3fe6a06.
There are some cases where only the first operand is marked for truncation. In that case, the compare won't be truncated which would incorrectly trigger the assertion. It also shows that the check pre 3fe6a06 also considered compares truncated that cannot be truncated.
1 parent 54194e1 commit cf2ecc7

File tree

2 files changed

+91
-5
lines changed

2 files changed

+91
-5
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6607,12 +6607,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
66076607
case Instruction::FCmp: {
66086608
Type *ValTy = I->getOperand(0)->getType();
66096609

6610-
Instruction *Op0AsInstruction = dyn_cast<Instruction>(I->getOperand(0));
6611-
(void)Op0AsInstruction;
6612-
assert((!canTruncateToMinimalBitwidth(Op0AsInstruction, VF) ||
6613-
canTruncateToMinimalBitwidth(I, VF)) &&
6614-
"truncating Op0 must imply truncating the compare");
66156610
if (canTruncateToMinimalBitwidth(I, VF)) {
6611+
Instruction *Op0AsInstruction = dyn_cast<Instruction>(I->getOperand(0));
6612+
(void)Op0AsInstruction;
66166613
assert(!canTruncateToMinimalBitwidth(Op0AsInstruction, VF) ||
66176614
MinBWs[I] == MinBWs[Op0AsInstruction] &&
66186615
"if both the operand and the compare are marked for "

llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,93 @@ exit:
269269
ret i8 %trunc
270270
}
271271

272+
define void @icmp_only_first_op_truncated(ptr noalias %dst, i32 %x, i64 %N, i64 %v, ptr noalias %src) #1 {
273+
; CHECK-LABEL: define void @icmp_only_first_op_truncated(
274+
; CHECK-SAME: ptr noalias [[DST:%.*]], i32 [[X:%.*]], i64 [[N:%.*]], i64 [[V:%.*]], ptr noalias [[SRC:%.*]]) #[[ATTR2]] {
275+
; CHECK-NEXT: [[ENTRY:.*]]:
276+
; CHECK-NEXT: [[T:%.*]] = trunc i64 [[N]] to i32
277+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[V]], 1
278+
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
279+
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 2
280+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
281+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
282+
; CHECK: [[VECTOR_PH]]:
283+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
284+
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
285+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
286+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
287+
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
288+
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2
289+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[N]], i64 0
290+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
291+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[T]], i64 0
292+
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
293+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[DST]], i64 0
294+
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT5]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
295+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
296+
; CHECK: [[VECTOR_BODY]]:
297+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
298+
; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 2 x i64> [[BROADCAST_SPLAT]] to <vscale x 2 x i32>
299+
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <vscale x 2 x i32> [[TMP7]], [[BROADCAST_SPLAT2]]
300+
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[X]] to i64
301+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[SRC]], i64 [[TMP9]]
302+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[TMP10]], i64 0
303+
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT3]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
304+
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT4]], i32 8, <vscale x 2 x i1> [[TMP8]], <vscale x 2 x double> poison)
305+
; CHECK-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> [[WIDE_MASKED_GATHER]], <vscale x 2 x ptr> [[BROADCAST_SPLAT6]], i32 8, <vscale x 2 x i1> [[TMP8]])
306+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
307+
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
308+
; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
309+
; CHECK: [[MIDDLE_BLOCK]]:
310+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
311+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
312+
; CHECK: [[SCALAR_PH]]:
313+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
314+
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
315+
; CHECK: [[LOOP_HEADER]]:
316+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
317+
; CHECK-NEXT: [[T1:%.*]] = trunc i64 [[N]] to i32
318+
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[T1]], [[T]]
319+
; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
320+
; CHECK: [[THEN]]:
321+
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[X]] to i64
322+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr double, ptr [[SRC]], i64 [[IDXPROM]]
323+
; CHECK-NEXT: [[RETVAL:%.*]] = load double, ptr [[ARRAYIDX]], align 8
324+
; CHECK-NEXT: store double [[RETVAL]], ptr [[DST]], align 8
325+
; CHECK-NEXT: br label %[[LOOP_LATCH]]
326+
; CHECK: [[LOOP_LATCH]]:
327+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
328+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[V]]
329+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]]
330+
; CHECK: [[EXIT]]:
331+
; CHECK-NEXT: ret void
332+
;
333+
entry:
334+
%t = trunc i64 %N to i32
335+
br label %loop.header
336+
337+
loop.header:
338+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
339+
%t1 = trunc i64 %N to i32
340+
%c = icmp eq i32 %t1, %t
341+
br i1 %c, label %then, label %loop.latch
342+
343+
then:
344+
%idxprom = zext i32 %x to i64
345+
%arrayidx = getelementptr double, ptr %src, i64 %idxprom
346+
%retval = load double, ptr %arrayidx, align 8
347+
store double %retval, ptr %dst, align 8
348+
br label %loop.latch
349+
350+
loop.latch:
351+
%iv.next = add i64 %iv, 1
352+
%ec = icmp eq i64 %iv, %v
353+
br i1 %ec, label %exit, label %loop.header
354+
355+
exit:
356+
ret void
357+
}
358+
272359
attributes #0 = { "target-features"="+64bit,+v,+zvl256b" }
273360
attributes #1 = { "target-features"="+64bit,+v" }
274361

@@ -283,4 +370,6 @@ attributes #1 = { "target-features"="+64bit,+v" }
283370
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
284371
; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
285372
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
373+
; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
374+
; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
286375
;.

0 commit comments

Comments
 (0)