Skip to content

Commit e5718ce

Browse files
rajatbajpaiyuxuanchen1997
authored andcommitted
[CVP][LVI] Add support for InsertElementInst in LVI (#99368)
Summary: Currently, the LVI analysis pass doesn't support InsertElementInst vector instruction. Due to this, some optimization opportunities are missed. For example, in the below example, ICMP instruction can be folded but it doesn't. ``` ... %ie1 = insertelement <2 x i32> poison, i32 10, i64 0 %ie2 = insertelement <2 x i32> %ie1, i32 20, i64 1 %icmp = icmp <2 x i1> %ie2, <i32 40, i32 40> ... ``` This change adds InsertElementInst support in the LVI analysis pass to fix the motivating example. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251386
1 parent e1e6e78 commit e5718ce

File tree

2 files changed

+48
-0
lines changed

2 files changed

+48
-0
lines changed

llvm/lib/Analysis/LazyValueInfo.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,8 @@ class LazyValueInfoImpl {
428428
std::optional<ValueLatticeElement> solveBlockValueIntrinsic(IntrinsicInst *II,
429429
BasicBlock *BB);
430430
std::optional<ValueLatticeElement>
431+
solveBlockValueInsertElement(InsertElementInst *IEI, BasicBlock *BB);
432+
std::optional<ValueLatticeElement>
431433
solveBlockValueExtractValue(ExtractValueInst *EVI, BasicBlock *BB);
432434
bool isNonNullAtEndOfBlock(Value *Val, BasicBlock *BB);
433435
void intersectAssumeOrGuardBlockValueConstantRange(Value *Val,
@@ -657,6 +659,9 @@ LazyValueInfoImpl::solveBlockValueImpl(Value *Val, BasicBlock *BB) {
657659
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI))
658660
return solveBlockValueBinaryOp(BO, BB);
659661

662+
if (auto *IEI = dyn_cast<InsertElementInst>(BBI))
663+
return solveBlockValueInsertElement(IEI, BB);
664+
660665
if (auto *EVI = dyn_cast<ExtractValueInst>(BBI))
661666
return solveBlockValueExtractValue(EVI, BB);
662667

@@ -1038,6 +1043,24 @@ LazyValueInfoImpl::solveBlockValueIntrinsic(IntrinsicInst *II, BasicBlock *BB) {
10381043
MetadataVal);
10391044
}
10401045

1046+
std::optional<ValueLatticeElement>
1047+
LazyValueInfoImpl::solveBlockValueInsertElement(InsertElementInst *IEI,
1048+
BasicBlock *BB) {
1049+
std::optional<ValueLatticeElement> OptEltVal =
1050+
getBlockValue(IEI->getOperand(1), BB, IEI);
1051+
if (!OptEltVal)
1052+
return std::nullopt;
1053+
ValueLatticeElement &Res = *OptEltVal;
1054+
1055+
std::optional<ValueLatticeElement> OptVecVal =
1056+
getBlockValue(IEI->getOperand(0), BB, IEI);
1057+
if (!OptVecVal)
1058+
return std::nullopt;
1059+
1060+
Res.mergeIn(*OptVecVal);
1061+
return Res;
1062+
}
1063+
10411064
std::optional<ValueLatticeElement>
10421065
LazyValueInfoImpl::solveBlockValueExtractValue(ExtractValueInst *EVI,
10431066
BasicBlock *BB) {

llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,3 +327,28 @@ join:
327327
%add = add <2 x i16> %phi, <i16 2, i16 3>
328328
ret <2 x i16> %add
329329
}
330+
331+
;; Check if ICMP instruction is constant folded or not.
332+
define <2 x i1> @insertelement_fold1() {
333+
; CHECK-LABEL: define <2 x i1> @insertelement_fold1() {
334+
; CHECK-NEXT: [[IE1:%.*]] = insertelement <2 x i32> poison, i32 10, i64 0
335+
; CHECK-NEXT: [[IE2:%.*]] = insertelement <2 x i32> [[IE1]], i32 20, i64 1
336+
; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true>
337+
;
338+
%ie1 = insertelement <2 x i32> poison, i32 10, i64 0
339+
%ie2 = insertelement <2 x i32> %ie1, i32 20, i64 1
340+
%icmp1 = icmp slt <2 x i32> %ie2, <i32 1024, i32 1024>
341+
ret <2 x i1> %icmp1
342+
}
343+
344+
;; Check if LVI is able to handle constant vector operands
345+
;; in InsertElementInst and CVP is able to fold ICMP instruction.
346+
define <2 x i1> @insertelement_fold2() {
347+
; CHECK-LABEL: define <2 x i1> @insertelement_fold2() {
348+
; CHECK-NEXT: [[IE1:%.*]] = insertelement <2 x i32> <i32 poison, i32 20>, i32 10, i64 0
349+
; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true>
350+
;
351+
%ie1 = insertelement <2 x i32> <i32 poison, i32 20>, i32 10, i64 0
352+
%icmp1 = icmp slt <2 x i32> %ie1, <i32 1024, i32 1024>
353+
ret <2 x i1> %icmp1
354+
}

0 commit comments

Comments
 (0)