Skip to content

Commit f937b17

Browse files
committed
[LV] Don't query SCEV for non-invariant values in cost model.
This fixes a divergence between VPlan and legacy cost model, matching behavior further up in getInstructionCost as well. Fixes #129236.
1 parent fa5db05 commit f937b17

File tree

2 files changed

+67
-1
lines changed

2 files changed

+67
-1
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6673,7 +6673,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
66736673
// Certain instructions can be cheaper to vectorize if they have a constant
66746674
// second vector operand. One example of this are shifts on x86.
66756675
Value *Op2 = I->getOperand(1);
6676-
if (!isa<Constant>(Op2) && PSE.getSE()->isSCEVable(Op2->getType()) &&
6676+
if (!isa<Constant>(Op2) && TheLoop->isLoopInvariant(Op2) &&
6677+
PSE.getSE()->isSCEVable(Op2->getType()) &&
66776678
isa<SCEVConstant>(PSE.getSCEV(Op2))) {
66786679
Op2 = cast<SCEVConstant>(PSE.getSCEV(Op2))->getValue();
66796680
}

llvm/test/Transforms/LoopVectorize/X86/cost-model.ll

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1335,6 +1335,71 @@ exit:
13351335
ret i32 %select.next
13361336
}
13371337

1338+
; Test for https://github.com/llvm/llvm-project/issues/129236.
1339+
define i32 @cost_ashr_with_op_known_invariant_via_scev(i8 %a) {
1340+
; CHECK-LABEL: @cost_ashr_with_op_known_invariant_via_scev(
1341+
; CHECK-NEXT: entry:
1342+
; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i16 0, 0
1343+
; CHECK-NEXT: [[CONV_I:%.*]] = sext i16 0 to i32
1344+
; CHECK-NEXT: [[CONV5_I:%.*]] = sext i8 [[A:%.*]] to i32
1345+
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
1346+
; CHECK: loop.header:
1347+
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 100, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1348+
; CHECK-NEXT: br i1 [[CMP_I]], label [[THEN:%.*]], label [[ELSE:%.*]]
1349+
; CHECK: then:
1350+
; CHECK-NEXT: [[P_1:%.*]] = phi i32 [ [[REM_I:%.*]], [[ELSE]] ], [ 0, [[LOOP_HEADER]] ]
1351+
; CHECK-NEXT: [[SHR_I:%.*]] = ashr i32 [[CONV5_I]], [[P_1]]
1352+
; CHECK-NEXT: [[TOBOOL6_NOT_I:%.*]] = icmp eq i32 [[SHR_I]], 0
1353+
; CHECK-NEXT: [[SEXT_I:%.*]] = shl i32 [[P_1]], 24
1354+
; CHECK-NEXT: [[TMP0:%.*]] = ashr exact i32 [[SEXT_I]], 24
1355+
; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TOBOOL6_NOT_I]], i32 [[TMP0]], i32 0
1356+
; CHECK-NEXT: br label [[LOOP_LATCH]]
1357+
; CHECK: else:
1358+
; CHECK-NEXT: [[REM_I]] = urem i32 -1, [[CONV_I]]
1359+
; CHECK-NEXT: [[CMP3_I:%.*]] = icmp sgt i32 [[REM_I]], 1
1360+
; CHECK-NEXT: br i1 [[CMP3_I]], label [[LOOP_LATCH]], label [[THEN]]
1361+
; CHECK: loop.latch:
1362+
; CHECK-NEXT: [[P_2:%.*]] = phi i32 [ 0, [[ELSE]] ], [ [[TMP1]], [[THEN]] ]
1363+
; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], -1
1364+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[IV_NEXT]], 0
1365+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_HEADER]]
1366+
; CHECK: exit:
1367+
; CHECK-NEXT: [[P_2_LCSSA:%.*]] = phi i32 [ [[P_2]], [[LOOP_LATCH]] ]
1368+
; CHECK-NEXT: ret i32 [[P_2_LCSSA]]
1369+
;
1370+
entry:
1371+
%cmp.i = icmp eq i16 0, 0
1372+
%conv.i = sext i16 0 to i32
1373+
%conv5.i = sext i8 %a to i32
1374+
br label %loop.header
1375+
1376+
loop.header:
1377+
%iv = phi i8 [ 100, %entry ], [ %iv.next, %loop.latch ]
1378+
br i1 %cmp.i, label %then, label %else
1379+
1380+
then:
1381+
%p.1 = phi i32 [ %rem.i, %else ], [ 0, %loop.header ]
1382+
%shr.i = ashr i32 %conv5.i, %p.1
1383+
%tobool6.not.i = icmp eq i32 %shr.i, 0
1384+
%sext.i = shl i32 %p.1, 24
1385+
%2 = ashr exact i32 %sext.i, 24
1386+
%3 = select i1 %tobool6.not.i, i32 %2, i32 0
1387+
br label %loop.latch
1388+
1389+
else:
1390+
%rem.i = urem i32 -1, %conv.i
1391+
%cmp3.i = icmp sgt i32 %rem.i, 1
1392+
br i1 %cmp3.i, label %loop.latch, label %then
1393+
1394+
loop.latch:
1395+
%p.2 = phi i32 [ 0, %else ], [ %3, %then ]
1396+
%iv.next = add i8 %iv, -1
1397+
%ec = icmp eq i8 %iv.next, 0
1398+
br i1 %ec, label %exit, label %loop.header
1399+
1400+
exit:
1401+
ret i32 %p.2
1402+
}
13381403
declare void @llvm.assume(i1 noundef) #0
13391404

13401405
attributes #0 = { "target-cpu"="penryn" }

0 commit comments

Comments
 (0)