Skip to content

Commit 0394c21

Browse files
committed
[AArch64] Don't model legal subvector insert/extract as scalarization
1 parent d25b58f commit 0394c21

File tree

2 files changed

+44
-14
lines changed

2 files changed

+44
-14
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,32 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
568568
}
569569
return Cost;
570570
}
571+
case Intrinsic::vector_extract: {
572+
// If both the vector argument and the return type are legal types, then
573+
// this should be a no-op or simple operation; return a relatively low cost.
574+
LLVMContext &C = RetTy->getContext();
575+
EVT MRTy = getTLI()->getValueType(DL, RetTy);
576+
EVT MPTy = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
577+
TargetLoweringBase::LegalizeKind RLK = getTLI()->getTypeConversion(C, MRTy);
578+
TargetLoweringBase::LegalizeKind PLK = getTLI()->getTypeConversion(C, MPTy);
579+
if (RLK.first == TargetLoweringBase::TypeLegal &&
580+
PLK.first == TargetLoweringBase::TypeLegal)
581+
return InstructionCost(1);
582+
break;
583+
}
584+
case Intrinsic::vector_insert: {
585+
// If both the vector and subvector arguments are legal types, then this
586+
// should be a no-op or simple operation; return a relatively low cost.
587+
LLVMContext &C = RetTy->getContext();
588+
EVT MTy0 = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
589+
EVT MTy1 = getTLI()->getValueType(DL, ICA.getArgTypes()[1]);
590+
TargetLoweringBase::LegalizeKind LK0 = getTLI()->getTypeConversion(C, MTy0);
591+
TargetLoweringBase::LegalizeKind LK1 = getTLI()->getTypeConversion(C, MTy1);
592+
if (LK0.first == TargetLoweringBase::TypeLegal &&
593+
LK1.first == TargetLoweringBase::TypeLegal)
594+
return InstructionCost(1);
595+
break;
596+
}
571597
case Intrinsic::bitreverse: {
572598
static const CostTblEntry BitreverseTbl[] = {
573599
{Intrinsic::bitreverse, MVT::i32, 1},

llvm/test/Transforms/LoopUnroll/AArch64/scalable-vec-ins-ext.ll

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -38,26 +38,30 @@ define void @test_ins_ext_cost(ptr readonly %a, ptr readonly %b, ptr readonly %c
3838
; UNROLL-256-LABEL: define void @test_ins_ext_cost(
3939
; UNROLL-256-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]], ptr readonly [[C:%.*]], ptr noalias [[D:%.*]]) #[[ATTR0:[0-9]+]] {
4040
; UNROLL-256-NEXT: entry:
41-
; UNROLL-256-NEXT: br label [[FOR_BODY:%.*]]
42-
; UNROLL-256: for.body:
43-
; UNROLL-256-NEXT: [[EXIT_COND:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ false, [[FOR_BODY]] ]
44-
; UNROLL-256-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 1, [[FOR_BODY]] ]
45-
; UNROLL-256-NEXT: [[GEP_A:%.*]] = getelementptr inbounds <8 x float>, ptr [[A]], i64 [[IV]]
46-
; UNROLL-256-NEXT: [[LOAD_A:%.*]] = load <8 x float>, ptr [[GEP_A]], align 16
47-
; UNROLL-256-NEXT: [[GEP_B:%.*]] = getelementptr inbounds <8 x float>, ptr [[B]], i64 [[IV]]
48-
; UNROLL-256-NEXT: [[LOAD_B:%.*]] = load <8 x float>, ptr [[GEP_B]], align 16
49-
; UNROLL-256-NEXT: [[GEP_C:%.*]] = getelementptr inbounds <8 x float>, ptr [[C]], i64 [[IV]]
50-
; UNROLL-256-NEXT: [[LOAD_C:%.*]] = load <8 x float>, ptr [[GEP_C]], align 16
41+
; UNROLL-256-NEXT: [[LOAD_A:%.*]] = load <8 x float>, ptr [[A]], align 16
42+
; UNROLL-256-NEXT: [[LOAD_B:%.*]] = load <8 x float>, ptr [[B]], align 16
43+
; UNROLL-256-NEXT: [[LOAD_C:%.*]] = load <8 x float>, ptr [[C]], align 16
5144
; UNROLL-256-NEXT: [[CAST_SCALABLE_B:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> undef, <8 x float> [[LOAD_B]], i64 0)
5245
; UNROLL-256-NEXT: [[CAST_SCALABLE_C:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> undef, <8 x float> [[LOAD_C]], i64 0)
5346
; UNROLL-256-NEXT: [[ADD:%.*]] = fadd <vscale x 4 x float> [[CAST_SCALABLE_B]], [[CAST_SCALABLE_C]]
5447
; UNROLL-256-NEXT: [[CAST_SCALABLE_A:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> undef, <8 x float> [[LOAD_A]], i64 0)
5548
; UNROLL-256-NEXT: [[MUL:%.*]] = fmul <vscale x 4 x float> [[CAST_SCALABLE_A]], [[ADD]]
5649
; UNROLL-256-NEXT: [[CAST_FIXED_D:%.*]] = tail call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> [[MUL]], i64 0)
57-
; UNROLL-256-NEXT: [[GEP_D:%.*]] = getelementptr inbounds <8 x float>, ptr [[D]], i64 0, i64 [[IV]]
58-
; UNROLL-256-NEXT: store <8 x float> [[CAST_FIXED_D]], ptr [[GEP_D]], align 16
59-
; UNROLL-256-NEXT: br i1 [[EXIT_COND]], label [[FOR_BODY]], label [[EXIT:%.*]]
60-
; UNROLL-256: exit:
50+
; UNROLL-256-NEXT: store <8 x float> [[CAST_FIXED_D]], ptr [[D]], align 16
51+
; UNROLL-256-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds <8 x float>, ptr [[A]], i64 1
52+
; UNROLL-256-NEXT: [[LOAD_A_1:%.*]] = load <8 x float>, ptr [[GEP_A_1]], align 16
53+
; UNROLL-256-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds <8 x float>, ptr [[B]], i64 1
54+
; UNROLL-256-NEXT: [[LOAD_B_1:%.*]] = load <8 x float>, ptr [[GEP_B_1]], align 16
55+
; UNROLL-256-NEXT: [[GEP_C_1:%.*]] = getelementptr inbounds <8 x float>, ptr [[C]], i64 1
56+
; UNROLL-256-NEXT: [[LOAD_C_1:%.*]] = load <8 x float>, ptr [[GEP_C_1]], align 16
57+
; UNROLL-256-NEXT: [[CAST_SCALABLE_B_1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> undef, <8 x float> [[LOAD_B_1]], i64 0)
58+
; UNROLL-256-NEXT: [[CAST_SCALABLE_C_1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> undef, <8 x float> [[LOAD_C_1]], i64 0)
59+
; UNROLL-256-NEXT: [[ADD_1:%.*]] = fadd <vscale x 4 x float> [[CAST_SCALABLE_B_1]], [[CAST_SCALABLE_C_1]]
60+
; UNROLL-256-NEXT: [[CAST_SCALABLE_A_1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> undef, <8 x float> [[LOAD_A_1]], i64 0)
61+
; UNROLL-256-NEXT: [[MUL_1:%.*]] = fmul <vscale x 4 x float> [[CAST_SCALABLE_A_1]], [[ADD_1]]
62+
; UNROLL-256-NEXT: [[CAST_FIXED_D_1:%.*]] = tail call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> [[MUL_1]], i64 0)
63+
; UNROLL-256-NEXT: [[GEP_D_1:%.*]] = getelementptr inbounds <8 x float>, ptr [[D]], i64 0, i64 1
64+
; UNROLL-256-NEXT: store <8 x float> [[CAST_FIXED_D_1]], ptr [[GEP_D_1]], align 16
6165
; UNROLL-256-NEXT: ret void
6266
;
6367
entry:

0 commit comments

Comments
 (0)