Skip to content

Commit 37b3bbc

Browse files
committed
Skip unpacked SVE types
1 parent b5b9931 commit 37b3bbc

File tree

2 files changed

+34
-6
lines changed

2 files changed

+34
-6
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -581,13 +581,20 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
581581
LLVMContext &C = RetTy->getContext();
582582
EVT MRTy = getTLI()->getValueType(DL, RetTy);
583583
EVT MPTy = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
584+
// Skip this if either the return type or the vector argument are unpacked
585+
// SVE types; they may get lowered to stack stores and loads.
586+
if ((MRTy.isScalableVector() &&
587+
MRTy.getSizeInBits().getKnownMinValue() != AArch64::SVEBitsPerBlock) ||
588+
(MPTy.isScalableVector() &&
589+
MPTy.getSizeInBits().getKnownMinValue() != AArch64::SVEBitsPerBlock))
590+
break;
584591
TargetLoweringBase::LegalizeKind RLK = getTLI()->getTypeConversion(C, MRTy);
585592
TargetLoweringBase::LegalizeKind PLK = getTLI()->getTypeConversion(C, MPTy);
586593
const ConstantInt *Idx = dyn_cast<ConstantInt>(ICA.getArgs()[1]);
587594
if (RLK.first == TargetLoweringBase::TypeLegal &&
588595
PLK.first == TargetLoweringBase::TypeLegal && Idx &&
589596
Idx->getZExtValue() == 0)
590-
return TTI::TCC_Basic;
597+
return TTI::TCC_Free;
591598
break;
592599
}
593600
case Intrinsic::vector_insert: {
@@ -603,13 +610,20 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
603610
LLVMContext &C = RetTy->getContext();
604611
EVT MTy0 = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
605612
EVT MTy1 = getTLI()->getValueType(DL, ICA.getArgTypes()[1]);
613+
// Skip this if either type is an unpacked SVE type; they may get lowered
614+
// to stack stores and loads.
615+
if ((MTy0.isScalableVector() &&
616+
MTy0.getSizeInBits().getKnownMinValue() != AArch64::SVEBitsPerBlock) ||
617+
(MTy1.isScalableVector() &&
618+
MTy1.getSizeInBits().getKnownMinValue() != AArch64::SVEBitsPerBlock))
619+
break;
606620
TargetLoweringBase::LegalizeKind LK0 = getTLI()->getTypeConversion(C, MTy0);
607621
TargetLoweringBase::LegalizeKind LK1 = getTLI()->getTypeConversion(C, MTy1);
608622
const ConstantInt *Idx = dyn_cast<ConstantInt>(ICA.getArgs()[2]);
609623
if (LK0.first == TargetLoweringBase::TypeLegal &&
610624
LK1.first == TargetLoweringBase::TypeLegal && Idx &&
611625
Idx->getZExtValue() == 0)
612-
return TTI::TCC_Basic;
626+
return TTI::TCC_Free;
613627
break;
614628
}
615629
case Intrinsic::bitreverse: {

llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,13 @@ declare <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x
3232

3333
define void @vector_insert_extract_idxzero_128b() #1 {
3434
; CHECK-LABEL: 'vector_insert_extract_idxzero_128b'
35-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> undef, i64 0)
36-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> undef, i64 0)
35+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> undef, i64 0)
36+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> undef, i64 0)
3737
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0)
3838
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0)
3939
; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0)
40+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0)
41+
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0)
4042
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
4143
;
4244
; TYPE_BASED_ONLY-LABEL: 'vector_insert_extract_idxzero_128b'
@@ -45,27 +47,35 @@ define void @vector_insert_extract_idxzero_128b() #1 {
4547
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0)
4648
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0)
4749
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0)
50+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0)
51+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0)
4852
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
4953
;
5054
%insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> undef, i64 0)
5155
%extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> undef, i64 0)
5256
%insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.v2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0)
5357
%extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0)
5458
%extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0)
59+
%insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0)
60+
%extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0)
5561
ret void
5662
}
5763
declare <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float>, <4 x float>, i64)
5864
declare <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double>, i64)
5965
declare <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.v2i1(<vscale x 16 x i1>, <vscale x 2 x i1>, i64)
6066
declare <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1>, i64)
67+
declare <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float>, <2 x float>, i64)
68+
declare <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half>, i64)
6169

6270
define void @vector_insert_extract_idxzero_256b() #2 {
6371
; CHECK-LABEL: 'vector_insert_extract_idxzero_256b'
64-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> undef, <16 x i16> undef, i64 0)
65-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> undef, i64 0)
72+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> undef, <16 x i16> undef, i64 0)
73+
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> undef, i64 0)
6674
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0)
6775
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0)
6876
; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0)
77+
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0)
78+
; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0)
6979
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
7080
;
7181
; TYPE_BASED_ONLY-LABEL: 'vector_insert_extract_idxzero_256b'
@@ -74,13 +84,17 @@ define void @vector_insert_extract_idxzero_256b() #2 {
7484
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0)
7585
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0)
7686
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0)
87+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0)
88+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0)
7789
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
7890
;
7991
%insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> undef, <16 x i16> undef, i64 0)
8092
%extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nx4f32(<vscale x 4 x float> undef, i64 0)
8193
%insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.v2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0)
8294
%extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0)
8395
%extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0)
96+
%insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0)
97+
%extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0)
8498
ret void
8599
}
86100
declare <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16>, <16 x i16>, i64)

0 commit comments

Comments
 (0)