Skip to content

Commit d942f5e

Browse files
authored
[VectorCombine] Combine extract/insert from vector into a shuffle (#115213)
insert (DstVec, (extract SrcVec, ExtIdx), InsIdx) --> shuffle (DstVec, SrcVec, Mask) This commit combines extract/insert on a vector into Shuffle with vector.
1 parent 67b81e2 commit d942f5e

File tree

6 files changed

+138
-56
lines changed

6 files changed

+138
-56
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ class VectorCombine {
106106
Instruction &I);
107107
bool foldExtractExtract(Instruction &I);
108108
bool foldInsExtFNeg(Instruction &I);
109+
bool foldInsExtVectorToShuffle(Instruction &I);
109110
bool foldBitcastShuffle(Instruction &I);
110111
bool scalarizeBinopOrCmp(Instruction &I);
111112
bool scalarizeVPIntrinsic(Instruction &I);
@@ -2787,6 +2788,51 @@ bool VectorCombine::shrinkType(llvm::Instruction &I) {
27872788
return true;
27882789
}
27892790

2791+
/// insert (DstVec, (extract SrcVec, ExtIdx), InsIdx) -->
2792+
/// shuffle (DstVec, SrcVec, Mask)
2793+
bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
2794+
Value *DstVec, *SrcVec;
2795+
uint64_t ExtIdx, InsIdx;
2796+
if (!match(&I,
2797+
m_InsertElt(m_Value(DstVec),
2798+
m_ExtractElt(m_Value(SrcVec), m_ConstantInt(ExtIdx)),
2799+
m_ConstantInt(InsIdx))))
2800+
return false;
2801+
2802+
auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
2803+
if (!VecTy || SrcVec->getType() != VecTy)
2804+
return false;
2805+
2806+
unsigned NumElts = VecTy->getNumElements();
2807+
if (ExtIdx >= NumElts || InsIdx >= NumElts)
2808+
return false;
2809+
2810+
SmallVector<int> Mask(NumElts, 0);
2811+
std::iota(Mask.begin(), Mask.end(), 0);
2812+
Mask[InsIdx] = ExtIdx + NumElts;
2813+
// Cost
2814+
auto *Ins = cast<InsertElementInst>(&I);
2815+
auto *Ext = cast<ExtractElementInst>(I.getOperand(1));
2816+
2817+
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2818+
InstructionCost OldCost =
2819+
TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx) +
2820+
TTI.getVectorInstrCost(*Ins, VecTy, CostKind, InsIdx);
2821+
2822+
InstructionCost NewCost =
2823+
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, VecTy, Mask);
2824+
if (!Ext->hasOneUse())
2825+
NewCost += TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx);
2826+
2827+
if (OldCost < NewCost)
2828+
return false;
2829+
2830+
Value *Shuf = Builder.CreateShuffleVector(DstVec, SrcVec, Mask);
2831+
replaceValue(I, *Shuf);
2832+
2833+
return true;
2834+
}
2835+
27902836
/// This is the entry point for all transforms. Pass manager differences are
27912837
/// handled in the callers of this function.
27922838
bool VectorCombine::run() {
@@ -2843,6 +2889,7 @@ bool VectorCombine::run() {
28432889
switch (Opcode) {
28442890
case Instruction::InsertElement:
28452891
MadeChange |= foldInsExtFNeg(I);
2892+
MadeChange |= foldInsExtVectorToShuffle(I);
28462893
break;
28472894
case Instruction::ShuffleVector:
28482895
MadeChange |= foldPermuteOfBinops(I);

llvm/test/Transforms/VectorCombine/X86/extract-binop-inseltpoison.ll

Lines changed: 44 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -420,8 +420,7 @@ define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) {
420420
; CHECK-LABEL: @ins_bo_ext_ext(
421421
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
422422
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
423-
; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
424-
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
423+
; CHECK-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
425424
; CHECK-NEXT: ret <4 x float> [[V3]]
426425
;
427426
%a2 = extractelement <4 x float> %a, i32 2
@@ -435,13 +434,21 @@ define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) {
435434
; but it is likely that extracting from index 3 is the better option.
436435

437436
define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
438-
; CHECK-LABEL: @ins_bo_ext_ext_uses(
439-
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
440-
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
441-
; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
442-
; CHECK-NEXT: call void @use_f32(float [[A23]])
443-
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
444-
; CHECK-NEXT: ret <4 x float> [[V3]]
437+
; SSE-LABEL: @ins_bo_ext_ext_uses(
438+
; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
439+
; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
440+
; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
441+
; SSE-NEXT: call void @use_f32(float [[A23]])
442+
; SSE-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 6>
443+
; SSE-NEXT: ret <4 x float> [[V3]]
444+
;
445+
; AVX-LABEL: @ins_bo_ext_ext_uses(
446+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
447+
; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
448+
; AVX-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
449+
; AVX-NEXT: call void @use_f32(float [[A23]])
450+
; AVX-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
451+
; AVX-NEXT: ret <4 x float> [[V3]]
445452
;
446453
%a2 = extractelement <4 x float> %a, i32 2
447454
%a3 = extractelement <4 x float> %a, i32 3
@@ -452,22 +459,34 @@ define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
452459
}
453460

454461
define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
455-
; CHECK-LABEL: @PR34724(
456-
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
457-
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
458-
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
459-
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
460-
; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
461-
; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
462-
; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
463-
; CHECK-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
464-
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
465-
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
466-
; CHECK-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
467-
; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> poison, float [[A23]], i32 1
468-
; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
469-
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[B23]], i32 3
470-
; CHECK-NEXT: ret <4 x float> [[V3]]
462+
; SSE-LABEL: @PR34724(
463+
; SSE-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
464+
; SSE-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
465+
; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
466+
; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
467+
; SSE-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
468+
; SSE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
469+
; SSE-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
470+
; SSE-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
471+
; SSE-NEXT: [[V1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 6, i32 2, i32 3>
472+
; SSE-NEXT: [[V2:%.*]] = shufflevector <4 x float> [[V1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
473+
; SSE-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
474+
; SSE-NEXT: ret <4 x float> [[V3]]
475+
;
476+
; AVX-LABEL: @PR34724(
477+
; AVX-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
478+
; AVX-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
479+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
480+
; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
481+
; AVX-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
482+
; AVX-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
483+
; AVX-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
484+
; AVX-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
485+
; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
486+
; AVX-NEXT: [[V1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 6, i32 2, i32 3>
487+
; AVX-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
488+
; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
489+
; AVX-NEXT: ret <4 x float> [[V3]]
471490
;
472491
%a0 = extractelement <4 x float> %a, i32 0
473492
%a1 = extractelement <4 x float> %a, i32 1

llvm/test/Transforms/VectorCombine/X86/extract-binop.ll

Lines changed: 44 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -420,8 +420,7 @@ define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) {
420420
; CHECK-LABEL: @ins_bo_ext_ext(
421421
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
422422
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]]
423-
; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i64 3
424-
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
423+
; CHECK-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
425424
; CHECK-NEXT: ret <4 x float> [[V3]]
426425
;
427426
%a2 = extractelement <4 x float> %a, i32 2
@@ -435,13 +434,21 @@ define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) {
435434
; but it is likely that extracting from index 3 is the better option.
436435

437436
define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
438-
; CHECK-LABEL: @ins_bo_ext_ext_uses(
439-
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
440-
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
441-
; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
442-
; CHECK-NEXT: call void @use_f32(float [[A23]])
443-
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
444-
; CHECK-NEXT: ret <4 x float> [[V3]]
437+
; SSE-LABEL: @ins_bo_ext_ext_uses(
438+
; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
439+
; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
440+
; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
441+
; SSE-NEXT: call void @use_f32(float [[A23]])
442+
; SSE-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 6>
443+
; SSE-NEXT: ret <4 x float> [[V3]]
444+
;
445+
; AVX-LABEL: @ins_bo_ext_ext_uses(
446+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
447+
; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
448+
; AVX-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
449+
; AVX-NEXT: call void @use_f32(float [[A23]])
450+
; AVX-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3
451+
; AVX-NEXT: ret <4 x float> [[V3]]
445452
;
446453
%a2 = extractelement <4 x float> %a, i32 2
447454
%a3 = extractelement <4 x float> %a, i32 3
@@ -452,22 +459,34 @@ define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
452459
}
453460

454461
define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
455-
; CHECK-LABEL: @PR34724(
456-
; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
457-
; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
458-
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
459-
; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
460-
; CHECK-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
461-
; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
462-
; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
463-
; CHECK-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
464-
; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
465-
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
466-
; CHECK-NEXT: [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
467-
; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> undef, float [[A23]], i32 1
468-
; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
469-
; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[B23]], i32 3
470-
; CHECK-NEXT: ret <4 x float> [[V3]]
462+
; SSE-LABEL: @PR34724(
463+
; SSE-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
464+
; SSE-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
465+
; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
466+
; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
467+
; SSE-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
468+
; SSE-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
469+
; SSE-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
470+
; SSE-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
471+
; SSE-NEXT: [[V1:%.*]] = shufflevector <4 x float> undef, <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 6, i32 2, i32 3>
472+
; SSE-NEXT: [[V2:%.*]] = shufflevector <4 x float> [[V1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
473+
; SSE-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
474+
; SSE-NEXT: ret <4 x float> [[V3]]
475+
;
476+
; AVX-LABEL: @PR34724(
477+
; AVX-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
478+
; AVX-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
479+
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
480+
; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
481+
; AVX-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
482+
; AVX-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
483+
; AVX-NEXT: [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
484+
; AVX-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
485+
; AVX-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
486+
; AVX-NEXT: [[V1:%.*]] = shufflevector <4 x float> undef, <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 6, i32 2, i32 3>
487+
; AVX-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[B01]], i32 2
488+
; AVX-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
489+
; AVX-NEXT: ret <4 x float> [[V3]]
471490
;
472491
%a0 = extractelement <4 x float> %a, i32 0
473492
%a1 = extractelement <4 x float> %a, i32 1

llvm/test/Transforms/VectorCombine/X86/extract-cmp.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,7 @@ define <4 x i1> @ins_fcmp_ext_ext(<4 x float> %a, <4 x i1> %b) {
163163
; AVX-LABEL: @ins_fcmp_ext_ext(
164164
; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 poison>
165165
; AVX-NEXT: [[TMP1:%.*]] = fcmp ugt <4 x float> [[A]], [[SHIFT]]
166-
; AVX-NEXT: [[A21:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
167-
; AVX-NEXT: [[R:%.*]] = insertelement <4 x i1> [[B:%.*]], i1 [[A21]], i32 2
166+
; AVX-NEXT: [[R:%.*]] = shufflevector <4 x i1> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
168167
; AVX-NEXT: ret <4 x i1> [[R]]
169168
;
170169
%a1 = extractelement <4 x float> %a, i32 1

llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -546,8 +546,7 @@ define void @PR47558_multiple_use_load(ptr nocapture nonnull %resultptr, ptr noc
546546
; CHECK-NEXT: [[T3:%.*]] = fmul <2 x float> [[OP]], [[T2]]
547547
; CHECK-NEXT: [[T4:%.*]] = extractelement <2 x float> [[T3]], i32 0
548548
; CHECK-NEXT: [[RESULT0:%.*]] = insertelement <2 x float> poison, float [[T4]], i32 0
549-
; CHECK-NEXT: [[T5:%.*]] = extractelement <2 x float> [[T3]], i32 1
550-
; CHECK-NEXT: [[RESULT1:%.*]] = insertelement <2 x float> [[RESULT0]], float [[T5]], i32 1
549+
; CHECK-NEXT: [[RESULT1:%.*]] = shufflevector <2 x float> [[RESULT0]], <2 x float> [[T3]], <2 x i32> <i32 0, i32 3>
551550
; CHECK-NEXT: store <2 x float> [[RESULT1]], ptr [[RESULTPTR:%.*]], align 8
552551
; CHECK-NEXT: ret void
553552
;

llvm/test/Transforms/VectorCombine/X86/load.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -529,8 +529,7 @@ define void @PR47558_multiple_use_load(ptr nocapture nonnull %resultptr, ptr noc
529529
; CHECK-NEXT: [[T3:%.*]] = fmul <2 x float> [[OP]], [[T2]]
530530
; CHECK-NEXT: [[T4:%.*]] = extractelement <2 x float> [[T3]], i32 0
531531
; CHECK-NEXT: [[RESULT0:%.*]] = insertelement <2 x float> undef, float [[T4]], i32 0
532-
; CHECK-NEXT: [[T5:%.*]] = extractelement <2 x float> [[T3]], i32 1
533-
; CHECK-NEXT: [[RESULT1:%.*]] = insertelement <2 x float> [[RESULT0]], float [[T5]], i32 1
532+
; CHECK-NEXT: [[RESULT1:%.*]] = shufflevector <2 x float> [[RESULT0]], <2 x float> [[T3]], <2 x i32> <i32 0, i32 3>
534533
; CHECK-NEXT: store <2 x float> [[RESULT1]], ptr [[RESULTPTR:%.*]], align 8
535534
; CHECK-NEXT: ret void
536535
;

0 commit comments

Comments
 (0)