Skip to content

Commit e5a32d7

Browse files
committed
[InstCombine] move extend after insertelement if both operands are extended
I was wondering how instcombine does on the examples in D109236, and we're missing a basic transform: inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index) https://alive2.llvm.org/ce/z/z2aBu9 Note that there are several possible extensions of this fold (see TODO comments). Differential Revision: https://reviews.llvm.org/D109537
1 parent 9bdb19c commit e5a32d7

File tree

2 files changed

+56
-11
lines changed

2 files changed

+56
-11
lines changed

llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1407,6 +1407,41 @@ static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
14071407
return nullptr;
14081408
}
14091409

1410+
/// If both the base vector and the inserted element are extended from the same
1411+
/// type, do the insert element in the narrow source type followed by extend.
1412+
/// TODO: This can be extended to include other cast opcodes, but particularly
1413+
/// if we create a wider insertelement, make sure codegen is not harmed.
1414+
static Instruction *narrowInsElt(InsertElementInst &InsElt,
1415+
InstCombiner::BuilderTy &Builder) {
1416+
// We are creating a vector extend. If the original vector extend has another
1417+
// use, that would mean we end up with 2 vector extends, so avoid that.
1418+
// TODO: We could ease the use-clause to "if at least one op has one use"
1419+
// (assuming that the source types match - see next TODO comment).
1420+
Value *Vec = InsElt.getOperand(0);
1421+
if (!Vec->hasOneUse())
1422+
return nullptr;
1423+
1424+
Value *Scalar = InsElt.getOperand(1);
1425+
Value *X, *Y;
1426+
CastInst::CastOps CastOpcode;
1427+
if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
1428+
CastOpcode = Instruction::FPExt;
1429+
else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
1430+
CastOpcode = Instruction::SExt;
1431+
else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
1432+
CastOpcode = Instruction::ZExt;
1433+
else
1434+
return nullptr;
1435+
1436+
// TODO: We can allow mismatched types by creating an intermediate cast.
1437+
if (X->getType()->getScalarType() != Y->getType())
1438+
return nullptr;
1439+
1440+
// inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1441+
Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
1442+
return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
1443+
}
1444+
14101445
Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
14111446
Value *VecOp = IE.getOperand(0);
14121447
Value *ScalarOp = IE.getOperand(1);
@@ -1526,6 +1561,9 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
15261561
if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
15271562
return IdentityShuf;
15281563

1564+
if (Instruction *Ext = narrowInsElt(IE, Builder))
1565+
return Ext;
1566+
15291567
return nullptr;
15301568
}
15311569

llvm/test/Transforms/InstCombine/insert-ext.ll

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,8 @@ declare void @usevec(<2 x i32>)
66

77
define <2 x double> @fpext_fpext(<2 x half> %x, half %y, i32 %index) {
88
; CHECK-LABEL: @fpext_fpext(
9-
; CHECK-NEXT: [[V:%.*]] = fpext <2 x half> [[X:%.*]] to <2 x double>
10-
; CHECK-NEXT: [[S:%.*]] = fpext half [[Y:%.*]] to double
11-
; CHECK-NEXT: [[I:%.*]] = insertelement <2 x double> [[V]], double [[S]], i32 [[INDEX:%.*]]
9+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[X:%.*]], half [[Y:%.*]], i32 [[INDEX:%.*]]
10+
; CHECK-NEXT: [[I:%.*]] = fpext <2 x half> [[TMP1]] to <2 x double>
1211
; CHECK-NEXT: ret <2 x double> [[I]]
1312
;
1413
%v = fpext <2 x half> %x to <2 x double>
@@ -19,9 +18,8 @@ define <2 x double> @fpext_fpext(<2 x half> %x, half %y, i32 %index) {
1918

2019
define <2 x i32> @sext_sext(<2 x i8> %x, i8 %y, i32 %index) {
2120
; CHECK-LABEL: @sext_sext(
22-
; CHECK-NEXT: [[V:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i32>
23-
; CHECK-NEXT: [[S:%.*]] = sext i8 [[Y:%.*]] to i32
24-
; CHECK-NEXT: [[I:%.*]] = insertelement <2 x i32> [[V]], i32 [[S]], i32 [[INDEX:%.*]]
21+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[X:%.*]], i8 [[Y:%.*]], i32 [[INDEX:%.*]]
22+
; CHECK-NEXT: [[I:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32>
2523
; CHECK-NEXT: ret <2 x i32> [[I]]
2624
;
2725
%v = sext <2 x i8> %x to <2 x i32>
@@ -32,9 +30,8 @@ define <2 x i32> @sext_sext(<2 x i8> %x, i8 %y, i32 %index) {
3230

3331
define <2 x i12> @zext_zext(<2 x i8> %x, i8 %y, i32 %index) {
3432
; CHECK-LABEL: @zext_zext(
35-
; CHECK-NEXT: [[V:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i12>
36-
; CHECK-NEXT: [[S:%.*]] = zext i8 [[Y:%.*]] to i12
37-
; CHECK-NEXT: [[I:%.*]] = insertelement <2 x i12> [[V]], i12 [[S]], i32 [[INDEX:%.*]]
33+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[X:%.*]], i8 [[Y:%.*]], i32 [[INDEX:%.*]]
34+
; CHECK-NEXT: [[I:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i12>
3835
; CHECK-NEXT: ret <2 x i12> [[I]]
3936
;
4037
%v = zext <2 x i8> %x to <2 x i12>
@@ -43,6 +40,8 @@ define <2 x i12> @zext_zext(<2 x i8> %x, i8 %y, i32 %index) {
4340
ret <2 x i12> %i
4441
}
4542

43+
; negative test - need same source type
44+
4645
define <2 x double> @fpext_fpext_types(<2 x half> %x, float %y, i32 %index) {
4746
; CHECK-LABEL: @fpext_fpext_types(
4847
; CHECK-NEXT: [[V:%.*]] = fpext <2 x half> [[X:%.*]] to <2 x double>
@@ -56,6 +55,8 @@ define <2 x double> @fpext_fpext_types(<2 x half> %x, float %y, i32 %index) {
5655
ret <2 x double> %i
5756
}
5857

58+
; negative test - need same source type
59+
5960
define <2 x i32> @sext_sext_types(<2 x i16> %x, i8 %y, i32 %index) {
6061
; CHECK-LABEL: @sext_sext_types(
6162
; CHECK-NEXT: [[V:%.*]] = sext <2 x i16> [[X:%.*]] to <2 x i32>
@@ -69,6 +70,8 @@ define <2 x i32> @sext_sext_types(<2 x i16> %x, i8 %y, i32 %index) {
6970
ret <2 x i32> %i
7071
}
7172

73+
; negative test - need same extend opcode
74+
7275
define <2 x i12> @sext_zext(<2 x i8> %x, i8 %y, i32 %index) {
7376
; CHECK-LABEL: @sext_zext(
7477
; CHECK-NEXT: [[V:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i12>
@@ -82,6 +85,8 @@ define <2 x i12> @sext_zext(<2 x i8> %x, i8 %y, i32 %index) {
8285
ret <2 x i12> %i
8386
}
8487

88+
; negative test - don't trade scalar extend for vector extend
89+
8590
define <2 x i32> @sext_sext_use1(<2 x i8> %x, i8 %y, i32 %index) {
8691
; CHECK-LABEL: @sext_sext_use1(
8792
; CHECK-NEXT: [[V:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i32>
@@ -99,10 +104,10 @@ define <2 x i32> @sext_sext_use1(<2 x i8> %x, i8 %y, i32 %index) {
99104

100105
define <2 x i32> @zext_zext_use2(<2 x i8> %x, i8 %y, i32 %index) {
101106
; CHECK-LABEL: @zext_zext_use2(
102-
; CHECK-NEXT: [[V:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>
103107
; CHECK-NEXT: [[S:%.*]] = zext i8 [[Y:%.*]] to i32
104108
; CHECK-NEXT: call void @use(i32 [[S]])
105-
; CHECK-NEXT: [[I:%.*]] = insertelement <2 x i32> [[V]], i32 [[S]], i32 [[INDEX:%.*]]
109+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> [[X:%.*]], i8 [[Y]], i32 [[INDEX:%.*]]
110+
; CHECK-NEXT: [[I:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i32>
106111
; CHECK-NEXT: ret <2 x i32> [[I]]
107112
;
108113
%v = zext <2 x i8> %x to <2 x i32>
@@ -112,6 +117,8 @@ define <2 x i32> @zext_zext_use2(<2 x i8> %x, i8 %y, i32 %index) {
112117
ret <2 x i32> %i
113118
}
114119

120+
; negative test - don't create an extra extend
121+
115122
define <2 x i32> @zext_zext_use3(<2 x i8> %x, i8 %y, i32 %index) {
116123
; CHECK-LABEL: @zext_zext_use3(
117124
; CHECK-NEXT: [[V:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>

0 commit comments

Comments
 (0)