Skip to content

Commit 521921e

Browse files
committed
Check that the insert/extract index is 0 before providing a low cost
1 parent f8b12f7 commit 521921e

File tree

3 files changed

+44
-24
lines changed

3 files changed

+44
-24
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -569,28 +569,44 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
569569
return Cost;
570570
}
571571
case Intrinsic::vector_extract: {
572-
// If both the vector argument and the return type are legal types, then
573-
// this should be a no-op or simple operation; return a relatively low cost.
572+
// If both the vector argument and the return type are legal types and the
573+
// index is 0, then this should be a no-op or simple operation; return a
574+
// relatively low cost.
575+
576+
// If arguments aren't actually supplied, then we cannot determine the
577+
// value of the index.
578+
if (ICA.getArgs().size() < 2)
579+
break;
574580
LLVMContext &C = RetTy->getContext();
575581
EVT MRTy = getTLI()->getValueType(DL, RetTy);
576582
EVT MPTy = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
577583
TargetLoweringBase::LegalizeKind RLK = getTLI()->getTypeConversion(C, MRTy);
578584
TargetLoweringBase::LegalizeKind PLK = getTLI()->getTypeConversion(C, MPTy);
585+
const ConstantInt *Idx = dyn_cast<ConstantInt>(ICA.getArgs()[1]);
579586
if (RLK.first == TargetLoweringBase::TypeLegal &&
580-
PLK.first == TargetLoweringBase::TypeLegal)
587+
PLK.first == TargetLoweringBase::TypeLegal && Idx &&
588+
Idx->getZExtValue() == 0)
581589
return InstructionCost(1);
582590
break;
583591
}
584592
case Intrinsic::vector_insert: {
585-
// If both the vector and subvector arguments are legal types, then this
586-
// should be a no-op or simple operation; return a relatively low cost.
593+
// If both the vector and subvector arguments are legal types and the index
594+
// is 0, then this should be a no-op or simple operation; return a
595+
// relatively low cost.
596+
597+
// If arguments aren't actually supplied, then we cannot determine the
598+
// value of the index.
599+
if (ICA.getArgs().size() < 3)
600+
break;
587601
LLVMContext &C = RetTy->getContext();
588602
EVT MTy0 = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
589603
EVT MTy1 = getTLI()->getValueType(DL, ICA.getArgTypes()[1]);
590604
TargetLoweringBase::LegalizeKind LK0 = getTLI()->getTypeConversion(C, MTy0);
591605
TargetLoweringBase::LegalizeKind LK1 = getTLI()->getTypeConversion(C, MTy1);
606+
const ConstantInt *Idx = dyn_cast<ConstantInt>(ICA.getArgs()[2]);
592607
if (LK0.first == TargetLoweringBase::TypeLegal &&
593-
LK1.first == TargetLoweringBase::TypeLegal)
608+
LK1.first == TargetLoweringBase::TypeLegal && Idx &&
609+
Idx->getZExtValue() == 0)
594610
return InstructionCost(1);
595611
break;
596612
}

llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ declare <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x
3232

3333
define void @vector_insert_extract_legal_idxzero_128b(<vscale x 4 x float> %v0, <4 x float> %v1, <vscale x 2 x double> %v2) #1 {
3434
; CHECK-LABEL: 'vector_insert_extract_legal_idxzero_128b'
35-
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> %v0, <4 x float> %v1, i64 0)
36-
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> %v2, i64 0)
35+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> %v0, <4 x float> %v1, i64 0)
36+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> %v2, i64 0)
3737
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
3838
;
3939
; TYPE_BASED_ONLY-LABEL: 'vector_insert_extract_legal_idxzero_128b'
@@ -50,8 +50,8 @@ declare <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double>, i
5050

5151
define void @vector_insert_extract_legal_idxzero_256b(<vscale x 8 x i16> %v0, <16 x i16> %v1, <vscale x 4 x float> %v2) #2 {
5252
; CHECK-LABEL: 'vector_insert_extract_legal_idxzero_256b'
53-
; CHECK-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> %v0, <16 x i16> %v1, i64 0)
54-
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> %v2, i64 0)
53+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> %v0, <16 x i16> %v1, i64 0)
54+
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> %v2, i64 0)
5555
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
5656
;
5757
; TYPE_BASED_ONLY-LABEL: 'vector_insert_extract_legal_idxzero_256b'

llvm/test/Transforms/LoopUnroll/AArch64/scalable-vec-ins-ext.ll

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,26 +12,30 @@ define void @test_ins_ext_cost(ptr readonly %a, ptr readonly %b, ptr readonly %c
1212
; CHECK-LABEL: define void @test_ins_ext_cost(
1313
; CHECK-SAME: ptr readonly [[A:%.*]], ptr readonly [[B:%.*]], ptr readonly [[C:%.*]], ptr noalias [[D:%.*]]) #[[ATTR0:[0-9]+]] {
1414
; CHECK-NEXT: entry:
15-
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
16-
; CHECK: for.body:
17-
; CHECK-NEXT: [[EXIT_COND:%.*]] = phi i1 [ true, [[ENTRY:%.*]] ], [ false, [[FOR_BODY]] ]
18-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 1, [[FOR_BODY]] ]
19-
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds <8 x float>, ptr [[A]], i64 [[IV]]
20-
; CHECK-NEXT: [[LOAD_A:%.*]] = load <8 x float>, ptr [[GEP_A]], align 16
21-
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds <8 x float>, ptr [[B]], i64 [[IV]]
22-
; CHECK-NEXT: [[LOAD_B:%.*]] = load <8 x float>, ptr [[GEP_B]], align 16
23-
; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds <8 x float>, ptr [[C]], i64 [[IV]]
24-
; CHECK-NEXT: [[LOAD_C:%.*]] = load <8 x float>, ptr [[GEP_C]], align 16
15+
; CHECK-NEXT: [[LOAD_A:%.*]] = load <8 x float>, ptr [[A]], align 16
16+
; CHECK-NEXT: [[LOAD_B:%.*]] = load <8 x float>, ptr [[B]], align 16
17+
; CHECK-NEXT: [[LOAD_C:%.*]] = load <8 x float>, ptr [[C]], align 16
2518
; CHECK-NEXT: [[CAST_SCALABLE_B:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> undef, <8 x float> [[LOAD_B]], i64 0)
2619
; CHECK-NEXT: [[CAST_SCALABLE_C:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> undef, <8 x float> [[LOAD_C]], i64 0)
2720
; CHECK-NEXT: [[ADD:%.*]] = fadd <vscale x 4 x float> [[CAST_SCALABLE_B]], [[CAST_SCALABLE_C]]
2821
; CHECK-NEXT: [[CAST_SCALABLE_A:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> undef, <8 x float> [[LOAD_A]], i64 0)
2922
; CHECK-NEXT: [[MUL:%.*]] = fmul <vscale x 4 x float> [[CAST_SCALABLE_A]], [[ADD]]
3023
; CHECK-NEXT: [[CAST_FIXED_D:%.*]] = tail call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> [[MUL]], i64 0)
31-
; CHECK-NEXT: [[GEP_D:%.*]] = getelementptr inbounds <8 x float>, ptr [[D]], i64 0, i64 [[IV]]
32-
; CHECK-NEXT: store <8 x float> [[CAST_FIXED_D]], ptr [[GEP_D]], align 16
33-
; CHECK-NEXT: br i1 [[EXIT_COND]], label [[FOR_BODY]], label [[EXIT:%.*]]
34-
; CHECK: exit:
24+
; CHECK-NEXT: store <8 x float> [[CAST_FIXED_D]], ptr [[D]], align 16
25+
; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds <8 x float>, ptr [[A]], i64 1
26+
; CHECK-NEXT: [[LOAD_A_1:%.*]] = load <8 x float>, ptr [[GEP_A_1]], align 16
27+
; CHECK-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds <8 x float>, ptr [[B]], i64 1
28+
; CHECK-NEXT: [[LOAD_B_1:%.*]] = load <8 x float>, ptr [[GEP_B_1]], align 16
29+
; CHECK-NEXT: [[GEP_C_1:%.*]] = getelementptr inbounds <8 x float>, ptr [[C]], i64 1
30+
; CHECK-NEXT: [[LOAD_C_1:%.*]] = load <8 x float>, ptr [[GEP_C_1]], align 16
31+
; CHECK-NEXT: [[CAST_SCALABLE_B_1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> undef, <8 x float> [[LOAD_B_1]], i64 0)
32+
; CHECK-NEXT: [[CAST_SCALABLE_C_1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> undef, <8 x float> [[LOAD_C_1]], i64 0)
33+
; CHECK-NEXT: [[ADD_1:%.*]] = fadd <vscale x 4 x float> [[CAST_SCALABLE_B_1]], [[CAST_SCALABLE_C_1]]
34+
; CHECK-NEXT: [[CAST_SCALABLE_A_1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v8f32(<vscale x 4 x float> undef, <8 x float> [[LOAD_A_1]], i64 0)
35+
; CHECK-NEXT: [[MUL_1:%.*]] = fmul <vscale x 4 x float> [[CAST_SCALABLE_A_1]], [[ADD_1]]
36+
; CHECK-NEXT: [[CAST_FIXED_D_1:%.*]] = tail call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> [[MUL_1]], i64 0)
37+
; CHECK-NEXT: [[GEP_D_1:%.*]] = getelementptr inbounds <8 x float>, ptr [[D]], i64 0, i64 1
38+
; CHECK-NEXT: store <8 x float> [[CAST_FIXED_D_1]], ptr [[GEP_D_1]], align 16
3539
; CHECK-NEXT: ret void
3640
;
3741
entry:

0 commit comments

Comments
 (0)