Skip to content

Commit 97f6076

Browse files
authored
[VectorCombine][X86] Use updated getVectorInstrCost hook (#137823)
This addresses a TODO where previously scalarizeBinopOrCmp conservatively bailed if one of the operands was a load. getVectorInstrCost was updated to take in values in https://reviews.llvm.org/D140498 so we can pass in the scalar value to be inserted, which should return an accurate cost for a gather. To prevent regressions on x86 this tries to constant fold NewVecC up front so we can pass it into TTI and get a more accurate cost. We want to remove this restriction on RISC-V since this is always profitable whether or not the scalar is a load.
1 parent b5588ce commit 97f6076

File tree

7 files changed

+169
-93
lines changed

7 files changed

+169
-93
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/ADT/Statistic.h"
2020
#include "llvm/Analysis/AssumptionCache.h"
2121
#include "llvm/Analysis/BasicAliasAnalysis.h"
22+
#include "llvm/Analysis/ConstantFolding.h"
2223
#include "llvm/Analysis/GlobalsModRef.h"
2324
#include "llvm/Analysis/Loads.h"
2425
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -1080,14 +1081,6 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
10801081
VecTy1->getElementCount().getKnownMinValue() <= Index1)
10811082
return false;
10821083

1083-
// Bail for single insertion if it is a load.
1084-
// TODO: Handle this once getVectorInstrCost can cost for load/stores.
1085-
auto *I0 = dyn_cast_or_null<Instruction>(V0);
1086-
auto *I1 = dyn_cast_or_null<Instruction>(V1);
1087-
if ((IsConst0 && I1 && I1->mayReadFromMemory()) ||
1088-
(IsConst1 && I0 && I0->mayReadFromMemory()))
1089-
return false;
1090-
10911084
uint64_t Index = IsConst0 ? Index1 : Index0;
10921085
Type *ScalarTy = IsConst0 ? V1->getType() : V0->getType();
10931086
Type *VecTy = I.getType();
@@ -1120,16 +1113,31 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
11201113
VectorOpCost = TTI.getIntrinsicInstrCost(VectorICA, CostKind);
11211114
}
11221115

1116+
// Fold the vector constants in the original vectors into a new base vector to
1117+
// get more accurate cost modelling.
1118+
Value *NewVecC;
1119+
if (isa<CmpInst>(I))
1120+
NewVecC = ConstantFoldCompareInstOperands(Pred, VecC0, VecC1, *DL);
1121+
else if (isa<BinaryOperator>(I))
1122+
NewVecC = ConstantFoldBinaryOpOperands((Instruction::BinaryOps)Opcode,
1123+
VecC0, VecC1, *DL);
1124+
else
1125+
NewVecC = ConstantFoldBinaryIntrinsic(
1126+
cast<IntrinsicInst>(I).getIntrinsicID(), VecC0, VecC1, I.getType(), &I);
1127+
11231128
// Get cost estimate for the insert element. This cost will factor into
11241129
// both sequences.
1125-
InstructionCost InsertCost = TTI.getVectorInstrCost(
1126-
Instruction::InsertElement, VecTy, CostKind, Index);
1127-
InstructionCost OldCost =
1128-
(IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
1129-
InstructionCost NewCost = ScalarOpCost + InsertCost +
1130-
(IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) +
1131-
(IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost);
1132-
1130+
InstructionCost InsertCostNewVecC = TTI.getVectorInstrCost(
1131+
Instruction::InsertElement, VecTy, CostKind, Index, NewVecC);
1132+
InstructionCost InsertCostV0 = TTI.getVectorInstrCost(
1133+
Instruction::InsertElement, VecTy, CostKind, Index, VecC0, V0);
1134+
InstructionCost InsertCostV1 = TTI.getVectorInstrCost(
1135+
Instruction::InsertElement, VecTy, CostKind, Index, VecC1, V1);
1136+
InstructionCost OldCost = (IsConst0 ? 0 : InsertCostV0) +
1137+
(IsConst1 ? 0 : InsertCostV1) + VectorOpCost;
1138+
InstructionCost NewCost = ScalarOpCost + InsertCostNewVecC +
1139+
(IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCostV0) +
1140+
(IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCostV1);
11331141
// We want to scalarize unless the vector variant actually has lower cost.
11341142
if (OldCost < NewCost || !NewCost.isValid())
11351143
return false;
@@ -1165,15 +1173,17 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
11651173
if (auto *ScalarInst = dyn_cast<Instruction>(Scalar))
11661174
ScalarInst->copyIRFlags(&I);
11671175

1168-
// Fold the vector constants in the original vectors into a new base vector.
1169-
Value *NewVecC;
1170-
if (isa<CmpInst>(I))
1171-
NewVecC = Builder.CreateCmp(Pred, VecC0, VecC1);
1172-
else if (isa<BinaryOperator>(I))
1173-
NewVecC = Builder.CreateBinOp((Instruction::BinaryOps)Opcode, VecC0, VecC1);
1174-
else
1175-
NewVecC = Builder.CreateIntrinsic(
1176-
VecTy, cast<IntrinsicInst>(I).getIntrinsicID(), {VecC0, VecC1});
1176+
// Create a new base vector if the constant folding failed.
1177+
if (!NewVecC) {
1178+
if (isa<CmpInst>(I))
1179+
NewVecC = Builder.CreateCmp(Pred, VecC0, VecC1);
1180+
else if (isa<BinaryOperator>(I))
1181+
NewVecC =
1182+
Builder.CreateBinOp((Instruction::BinaryOps)Opcode, VecC0, VecC1);
1183+
else
1184+
NewVecC = Builder.CreateIntrinsic(
1185+
VecTy, cast<IntrinsicInst>(I).getIntrinsicID(), {VecC0, VecC1});
1186+
}
11771187
Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);
11781188
replaceValue(I, *Insert);
11791189
return true;
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -S -p vector-combine -mtriple=riscv64 -mattr=+v | FileCheck %s
3+
4+
define <4 x i32> @add_constant_load(ptr %p) {
5+
; CHECK-LABEL: define <4 x i32> @add_constant_load(
6+
; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P]], align 4
8+
; CHECK-NEXT: [[V_SCALAR:%.*]] = add i32 [[X]], 42
9+
; CHECK-NEXT: [[V:%.*]] = insertelement <4 x i32> poison, i32 [[V_SCALAR]], i64 0
10+
; CHECK-NEXT: ret <4 x i32> [[V]]
11+
;
12+
%x = load i32, ptr %p
13+
%ins = insertelement <4 x i32> poison, i32 %x, i32 0
14+
%v = add <4 x i32> %ins, splat (i32 42)
15+
ret <4 x i32> %v
16+
}

llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant-inseltpoison.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,8 @@ define <2 x i64> @shl_constant_op0_not_undef_lane(i64 %x) {
153153
define <2 x i64> @shl_constant_op0_load(ptr %p) {
154154
; CHECK-LABEL: @shl_constant_op0_load(
155155
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
156-
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 1
157-
; CHECK-NEXT: [[BO:%.*]] = shl <2 x i64> <i64 undef, i64 2>, [[INS]]
156+
; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl i64 2, [[LD]]
157+
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1
158158
; CHECK-NEXT: ret <2 x i64> [[BO]]
159159
;
160160
%ld = load i64, ptr %p
@@ -204,8 +204,8 @@ define <2 x i64> @shl_constant_op1_not_undef_lane(i64 %x) {
204204
define <2 x i64> @shl_constant_op1_load(ptr %p) {
205205
; CHECK-LABEL: @shl_constant_op1_load(
206206
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
207-
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 0
208-
; CHECK-NEXT: [[BO:%.*]] = shl nuw <2 x i64> [[INS]], <i64 5, i64 2>
207+
; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl nuw i64 [[LD]], 5
208+
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 0
209209
; CHECK-NEXT: ret <2 x i64> [[BO]]
210210
;
211211
%ld = load i64, ptr %p

llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll

Lines changed: 66 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,8 @@ define <2 x i64> @shl_constant_op0_not_undef_lane(i64 %x) {
153153
define <2 x i64> @shl_constant_op0_load(ptr %p) {
154154
; CHECK-LABEL: @shl_constant_op0_load(
155155
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
156-
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[LD]], i32 1
157-
; CHECK-NEXT: [[BO:%.*]] = shl <2 x i64> <i64 undef, i64 2>, [[INS]]
156+
; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl i64 2, [[LD]]
157+
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> poison, i64 [[BO_SCALAR]], i64 1
158158
; CHECK-NEXT: ret <2 x i64> [[BO]]
159159
;
160160
%ld = load i64, ptr %p
@@ -204,8 +204,8 @@ define <2 x i64> @shl_constant_op1_not_undef_lane(i64 %x) {
204204
define <2 x i64> @shl_constant_op1_load(ptr %p) {
205205
; CHECK-LABEL: @shl_constant_op1_load(
206206
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
207-
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[LD]], i32 0
208-
; CHECK-NEXT: [[BO:%.*]] = shl nuw <2 x i64> [[INS]], <i64 5, i64 2>
207+
; CHECK-NEXT: [[BO_SCALAR:%.*]] = shl nuw i64 [[LD]], 5
208+
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0
209209
; CHECK-NEXT: ret <2 x i64> [[BO]]
210210
;
211211
%ld = load i64, ptr %p
@@ -479,21 +479,31 @@ define <2 x i64> @sdiv_constant_op1_not_undef_lane(i64 %x) {
479479
}
480480

481481
define <2 x i64> @and_constant(i64 %x) {
482-
; CHECK-LABEL: @and_constant(
483-
; CHECK-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
484-
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[BO_SCALAR]], i64 0
485-
; CHECK-NEXT: ret <2 x i64> [[BO]]
482+
; SSE-LABEL: @and_constant(
483+
; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
484+
; SSE-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], <i64 42, i64 undef>
485+
; SSE-NEXT: ret <2 x i64> [[BO]]
486+
;
487+
; AVX-LABEL: @and_constant(
488+
; AVX-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
489+
; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[BO_SCALAR]], i64 0
490+
; AVX-NEXT: ret <2 x i64> [[BO]]
486491
;
487492
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
488493
%bo = and <2 x i64> %ins, <i64 42, i64 undef>
489494
ret <2 x i64> %bo
490495
}
491496

492497
define <2 x i64> @and_constant_not_undef_lane(i64 %x) {
493-
; CHECK-LABEL: @and_constant_not_undef_lane(
494-
; CHECK-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
495-
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0
496-
; CHECK-NEXT: ret <2 x i64> [[BO]]
498+
; SSE-LABEL: @and_constant_not_undef_lane(
499+
; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
500+
; SSE-NEXT: [[BO:%.*]] = and <2 x i64> [[INS]], <i64 42, i64 -42>
501+
; SSE-NEXT: ret <2 x i64> [[BO]]
502+
;
503+
; AVX-LABEL: @and_constant_not_undef_lane(
504+
; AVX-NEXT: [[BO_SCALAR:%.*]] = and i64 [[X:%.*]], 42
505+
; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BO_SCALAR]], i64 0
506+
; AVX-NEXT: ret <2 x i64> [[BO]]
497507
;
498508
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
499509
%bo = and <2 x i64> %ins, <i64 42, i64 -42>
@@ -523,10 +533,15 @@ define <2 x i64> @or_constant_not_undef_lane(i64 %x) {
523533
}
524534

525535
define <2 x i64> @xor_constant(i64 %x) {
526-
; CHECK-LABEL: @xor_constant(
527-
; CHECK-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
528-
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 0
529-
; CHECK-NEXT: ret <2 x i64> [[BO]]
536+
; SSE-LABEL: @xor_constant(
537+
; SSE-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
538+
; SSE-NEXT: [[BO:%.*]] = xor <2 x i64> [[INS]], <i64 42, i64 undef>
539+
; SSE-NEXT: ret <2 x i64> [[BO]]
540+
;
541+
; AVX-LABEL: @xor_constant(
542+
; AVX-NEXT: [[BO_SCALAR:%.*]] = xor i64 [[X:%.*]], 42
543+
; AVX-NEXT: [[BO:%.*]] = insertelement <2 x i64> <i64 undef, i64 0>, i64 [[BO_SCALAR]], i64 0
544+
; AVX-NEXT: ret <2 x i64> [[BO]]
530545
;
531546
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
532547
%bo = xor <2 x i64> %ins, <i64 42, i64 undef>
@@ -546,8 +561,8 @@ define <2 x i64> @xor_constant_not_undef_lane(i64 %x) {
546561

547562
define <2 x double> @fadd_constant(double %x) {
548563
; CHECK-LABEL: @fadd_constant(
549-
; CHECK-NEXT: [[BO_SCALAR:%.*]] = fadd double [[X:%.*]], 4.200000e+01
550-
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
564+
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
565+
; CHECK-NEXT: [[BO:%.*]] = fadd <2 x double> [[INS]], <double 4.200000e+01, double undef>
551566
; CHECK-NEXT: ret <2 x double> [[BO]]
552567
;
553568
%ins = insertelement <2 x double> undef, double %x, i32 0
@@ -568,8 +583,8 @@ define <2 x double> @fadd_constant_not_undef_lane(double %x) {
568583

569584
define <2 x double> @fsub_constant_op0(double %x) {
570585
; CHECK-LABEL: @fsub_constant_op0(
571-
; CHECK-NEXT: [[BO_SCALAR:%.*]] = fsub fast double 4.200000e+01, [[X:%.*]]
572-
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
586+
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
587+
; CHECK-NEXT: [[BO:%.*]] = fsub fast <2 x double> <double 4.200000e+01, double undef>, [[INS]]
573588
; CHECK-NEXT: ret <2 x double> [[BO]]
574589
;
575590
%ins = insertelement <2 x double> undef, double %x, i32 0
@@ -601,8 +616,8 @@ define <2 x double> @fsub_constant_op1(double %x) {
601616

602617
define <2 x double> @fsub_constant_op1_not_undef_lane(double %x) {
603618
; CHECK-LABEL: @fsub_constant_op1_not_undef_lane(
604-
; CHECK-NEXT: [[BO_SCALAR:%.*]] = fsub double [[X:%.*]], 4.200000e+01
605-
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
619+
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
620+
; CHECK-NEXT: [[BO:%.*]] = fsub <2 x double> [[INS]], <double 4.200000e+01, double -4.200000e+01>
606621
; CHECK-NEXT: ret <2 x double> [[BO]]
607622
;
608623
%ins = insertelement <2 x double> undef, double %x, i32 0
@@ -612,8 +627,8 @@ define <2 x double> @fsub_constant_op1_not_undef_lane(double %x) {
612627

613628
define <2 x double> @fmul_constant(double %x) {
614629
; CHECK-LABEL: @fmul_constant(
615-
; CHECK-NEXT: [[BO_SCALAR:%.*]] = fmul reassoc double [[X:%.*]], 4.200000e+01
616-
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
630+
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
631+
; CHECK-NEXT: [[BO:%.*]] = fmul reassoc <2 x double> [[INS]], <double 4.200000e+01, double undef>
617632
; CHECK-NEXT: ret <2 x double> [[BO]]
618633
;
619634
%ins = insertelement <2 x double> undef, double %x, i32 0
@@ -644,32 +659,47 @@ define <2 x double> @fdiv_constant_op0(double %x) {
644659
}
645660

646661
define <2 x double> @fdiv_constant_op0_not_undef_lane(double %x) {
647-
; CHECK-LABEL: @fdiv_constant_op0_not_undef_lane(
648-
; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv ninf double 4.200000e+01, [[X:%.*]]
649-
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
650-
; CHECK-NEXT: ret <2 x double> [[BO]]
662+
; SSE-LABEL: @fdiv_constant_op0_not_undef_lane(
663+
; SSE-NEXT: [[BO_SCALAR:%.*]] = fdiv ninf double 4.200000e+01, [[X:%.*]]
664+
; SSE-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
665+
; SSE-NEXT: ret <2 x double> [[BO]]
666+
;
667+
; AVX-LABEL: @fdiv_constant_op0_not_undef_lane(
668+
; AVX-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
669+
; AVX-NEXT: [[BO:%.*]] = fdiv ninf <2 x double> <double 4.200000e+01, double -4.200000e+01>, [[INS]]
670+
; AVX-NEXT: ret <2 x double> [[BO]]
651671
;
652672
%ins = insertelement <2 x double> undef, double %x, i32 0
653673
%bo = fdiv ninf <2 x double> <double 42.0, double -42.0>, %ins
654674
ret <2 x double> %bo
655675
}
656676

657677
define <2 x double> @fdiv_constant_op1(double %x) {
658-
; CHECK-LABEL: @fdiv_constant_op1(
659-
; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01
660-
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
661-
; CHECK-NEXT: ret <2 x double> [[BO]]
678+
; SSE-LABEL: @fdiv_constant_op1(
679+
; SSE-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01
680+
; SSE-NEXT: [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double undef>, double [[BO_SCALAR]], i64 0
681+
; SSE-NEXT: ret <2 x double> [[BO]]
682+
;
683+
; AVX-LABEL: @fdiv_constant_op1(
684+
; AVX-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
685+
; AVX-NEXT: [[BO:%.*]] = fdiv <2 x double> [[INS]], <double 4.200000e+01, double undef>
686+
; AVX-NEXT: ret <2 x double> [[BO]]
662687
;
663688
%ins = insertelement <2 x double> undef, double %x, i32 0
664689
%bo = fdiv <2 x double> %ins, <double 42.0, double undef>
665690
ret <2 x double> %bo
666691
}
667692

668693
define <2 x double> @fdiv_constant_op1_not_undef_lane(double %x) {
669-
; CHECK-LABEL: @fdiv_constant_op1_not_undef_lane(
670-
; CHECK-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01
671-
; CHECK-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
672-
; CHECK-NEXT: ret <2 x double> [[BO]]
694+
; SSE-LABEL: @fdiv_constant_op1_not_undef_lane(
695+
; SSE-NEXT: [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01
696+
; SSE-NEXT: [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
697+
; SSE-NEXT: ret <2 x double> [[BO]]
698+
;
699+
; AVX-LABEL: @fdiv_constant_op1_not_undef_lane(
700+
; AVX-NEXT: [[INS:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
701+
; AVX-NEXT: [[BO:%.*]] = fdiv <2 x double> [[INS]], <double 4.200000e+01, double -4.200000e+01>
702+
; AVX-NEXT: ret <2 x double> [[BO]]
673703
;
674704
%ins = insertelement <2 x double> undef, double %x, i32 0
675705
%bo = fdiv <2 x double> %ins, <double 42.0, double -42.0>

llvm/test/Transforms/VectorCombine/X86/insert-binop.ll

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -155,12 +155,19 @@ define <2 x i64> @ins1_ins1_urem(i64 %x, i64 %y) {
155155
; Extra use is accounted for in cost calculation.
156156

157157
define <4 x i32> @ins0_ins0_xor(i32 %x, i32 %y) {
158-
; CHECK-LABEL: @ins0_ins0_xor(
159-
; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
160-
; CHECK-NEXT: call void @use(<4 x i32> [[I0]])
161-
; CHECK-NEXT: [[R_SCALAR:%.*]] = xor i32 [[X]], [[Y:%.*]]
162-
; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[R_SCALAR]], i64 0
163-
; CHECK-NEXT: ret <4 x i32> [[R]]
158+
; SSE-LABEL: @ins0_ins0_xor(
159+
; SSE-NEXT: [[I0:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
160+
; SSE-NEXT: call void @use(<4 x i32> [[I0]])
161+
; SSE-NEXT: [[I1:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0
162+
; SSE-NEXT: [[R:%.*]] = xor <4 x i32> [[I0]], [[I1]]
163+
; SSE-NEXT: ret <4 x i32> [[R]]
164+
;
165+
; AVX-LABEL: @ins0_ins0_xor(
166+
; AVX-NEXT: [[I0:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0
167+
; AVX-NEXT: call void @use(<4 x i32> [[I0]])
168+
; AVX-NEXT: [[R_SCALAR:%.*]] = xor i32 [[X]], [[Y:%.*]]
169+
; AVX-NEXT: [[R:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[R_SCALAR]], i64 0
170+
; AVX-NEXT: ret <4 x i32> [[R]]
164171
;
165172
%i0 = insertelement <4 x i32> undef, i32 %x, i32 0
166173
call void @use(<4 x i32> %i0)

llvm/test/Transforms/VectorCombine/X86/scalarize-cmp-inseltpoison.ll

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s
3-
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s
2+
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE
3+
; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX
44

55
declare void @use(<4 x i32>)
66
declare void @usef(<4 x float>)
@@ -165,14 +165,18 @@ define <2 x i1> @constant_op1_i64_not_undef_lane(i64 %x) {
165165
ret <2 x i1> %r
166166
}
167167

168-
; negative test - load prevents the transform
169-
170168
define <2 x i1> @constant_op1_i64_load(ptr %p) {
171-
; CHECK-LABEL: @constant_op1_i64_load(
172-
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
173-
; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 0
174-
; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i64> [[INS]], <i64 42, i64 -42>
175-
; CHECK-NEXT: ret <2 x i1> [[R]]
169+
; SSE-LABEL: @constant_op1_i64_load(
170+
; SSE-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
171+
; SSE-NEXT: [[R_SCALAR:%.*]] = icmp eq i64 [[LD]], 42
172+
; SSE-NEXT: [[R:%.*]] = insertelement <2 x i1> poison, i1 [[R_SCALAR]], i64 0
173+
; SSE-NEXT: ret <2 x i1> [[R]]
174+
;
175+
; AVX-LABEL: @constant_op1_i64_load(
176+
; AVX-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8
177+
; AVX-NEXT: [[INS:%.*]] = insertelement <2 x i64> poison, i64 [[LD]], i32 0
178+
; AVX-NEXT: [[R:%.*]] = icmp eq <2 x i64> [[INS]], <i64 42, i64 -42>
179+
; AVX-NEXT: ret <2 x i1> [[R]]
176180
;
177181
%ld = load i64, ptr %p
178182
%ins = insertelement <2 x i64> poison, i64 %ld, i32 0

0 commit comments

Comments
 (0)