Skip to content

Commit 6731da8

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:74724902ba2f into amd-gfx:d73ca634171d
Local branch amd-gfx d73ca63 Merged main:02d27eac0f3f into amd-gfx:4e43160dfb0f Remote branch main 7472490 [AArch64] Split Ampere1Write_Arith into rr/ri and rs/rx InstRWs. (llvm#66384)
2 parents d73ca63 + 7472490 commit 6731da8

File tree

53 files changed

+1233
-175
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+1233
-175
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17392,14 +17392,22 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1739217392
case AMDGPU::BI__builtin_amdgcn_log_clampf:
1739317393
return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
1739417394
case AMDGPU::BI__builtin_amdgcn_ldexp:
17395-
case AMDGPU::BI__builtin_amdgcn_ldexpf:
17396-
case AMDGPU::BI__builtin_amdgcn_ldexph: {
17395+
case AMDGPU::BI__builtin_amdgcn_ldexpf: {
1739717396
llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
1739817397
llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
1739917398
llvm::Function *F =
1740017399
CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});
1740117400
return Builder.CreateCall(F, {Src0, Src1});
1740217401
}
17402+
case AMDGPU::BI__builtin_amdgcn_ldexph: {
17403+
// The raw instruction has a different behavior for out of bounds exponent
17404+
// values (implicit truncation instead of saturate to short_min/short_max).
17405+
llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
17406+
llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
17407+
llvm::Function *F =
17408+
CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});
17409+
return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});
17410+
}
1740317411
case AMDGPU::BI__builtin_amdgcn_frexp_mant:
1740417412
case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
1740517413
case AMDGPU::BI__builtin_amdgcn_frexp_manth:

clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ void test_cos_f16(global half* out, half a)
5252
}
5353

5454
// CHECK-LABEL: @test_ldexp_f16
55-
// CHECK: call half @llvm.ldexp.f16.i32
55+
// CHECK: [[TRUNC:%[0-9a-z]+]] = trunc i32
56+
// CHECK: call half @llvm.ldexp.f16.i16(half %a, i16 [[TRUNC]])
5657
void test_ldexp_f16(global half* out, half a, int b)
5758
{
5859
*out = __builtin_amdgcn_ldexph(a, b);

flang/lib/Lower/ConvertVariable.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,11 @@ static bool needEndFinalization(const Fortran::lower::pft::Variable &var) {
660660
if (!var.hasSymbol())
661661
return false;
662662
const Fortran::semantics::Symbol &sym = var.getSymbol();
663+
const Fortran::semantics::Scope &owner = sym.owner();
664+
if (owner.kind() == Fortran::semantics::Scope::Kind::MainProgram) {
665+
// The standard does not require finalizing main program variables.
666+
return false;
667+
}
663668
if (!Fortran::semantics::IsPointer(sym) &&
664669
!Fortran::semantics::IsAllocatable(sym) &&
665670
!Fortran::semantics::IsDummy(sym) &&

flang/lib/Lower/IO.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -655,7 +655,7 @@ static void genNamelistIO(Fortran::lower::AbstractConverter &converter,
655655
static mlir::func::FuncOp getOutputFunc(mlir::Location loc,
656656
fir::FirOpBuilder &builder,
657657
mlir::Type type, bool isFormatted) {
658-
if (type.isa<fir::RecordType>())
658+
if (fir::unwrapPassByRefType(type).isa<fir::RecordType>())
659659
return getIORuntimeFunc<mkIOKey(OutputDerivedType)>(loc, builder);
660660
if (!isFormatted)
661661
return getIORuntimeFunc<mkIOKey(OutputDescriptor)>(loc, builder);
@@ -737,7 +737,7 @@ static void genOutputItemList(
737737
if (argType.isa<fir::BoxType>()) {
738738
mlir::Value box = fir::getBase(converter.genExprBox(loc, *expr, stmtCtx));
739739
outputFuncArgs.push_back(builder.createConvert(loc, argType, box));
740-
if (itemTy.isa<fir::RecordType>())
740+
if (fir::unwrapPassByRefType(itemTy).isa<fir::RecordType>())
741741
outputFuncArgs.push_back(getNonTbpDefinedIoTableAddr(converter));
742742
} else if (helper.isCharacterScalar(itemTy)) {
743743
fir::ExtendedValue exv = converter.genExprAddr(loc, expr, stmtCtx);
@@ -772,7 +772,7 @@ static void genOutputItemList(
772772
static mlir::func::FuncOp getInputFunc(mlir::Location loc,
773773
fir::FirOpBuilder &builder,
774774
mlir::Type type, bool isFormatted) {
775-
if (type.isa<fir::RecordType>())
775+
if (fir::unwrapPassByRefType(type).isa<fir::RecordType>())
776776
return getIORuntimeFunc<mkIOKey(InputDerivedType)>(loc, builder);
777777
if (!isFormatted)
778778
return getIORuntimeFunc<mkIOKey(InputDescriptor)>(loc, builder);
@@ -834,7 +834,7 @@ createIoRuntimeCallForItem(Fortran::lower::AbstractConverter &converter,
834834
auto boxTy = box.getType().dyn_cast<fir::BaseBoxType>();
835835
assert(boxTy && "must be previously emboxed");
836836
inputFuncArgs.push_back(builder.createConvert(loc, argType, box));
837-
if (boxTy.getEleTy().isa<fir::RecordType>())
837+
if (fir::unwrapPassByRefType(boxTy).isa<fir::RecordType>())
838838
inputFuncArgs.push_back(getNonTbpDefinedIoTableAddr(converter));
839839
} else {
840840
mlir::Value itemAddr = fir::getBase(item);

flang/test/Lower/derived-type-finalization.f90

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -248,20 +248,7 @@ subroutine local_t4()
248248
program p
249249
use derived_type_finalization
250250
type(t1) :: t
251-
if (t%a == 10) return
252-
print *, 'end of program'
253251
end program
254252

255253
! CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "p"} {
256-
! CHECK: %[[T:.*]] = fir.alloca !fir.type<_QMderived_type_finalizationTt1{a:i32}> {bindc_name = "t", uniq_name = "_QFEt"}
257-
! CHECK: cf.cond_br %{{.*}}, ^bb1, ^bb2
258-
! CHECK: ^bb1:
259-
! CHECK: %[[EMBOX:.*]] = fir.embox %[[T]] : (!fir.ref<!fir.type<_QMderived_type_finalizationTt1{a:i32}>>) -> !fir.box<!fir.type<_QMderived_type_finalizationTt1{a:i32}>>
260-
! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[EMBOX]] : (!fir.box<!fir.type<_QMderived_type_finalizationTt1{a:i32}>>) -> !fir.box<none>
261-
! CHECK: %{{.*}} = fir.call @_FortranADestroy(%[[BOX_NONE]]) {{.*}} : (!fir.box<none>) -> none
262-
! CHECK: return
263-
! CHECK: ^bb2:
264-
! CHECK: %[[EMBOX:.*]] = fir.embox %[[T]] : (!fir.ref<!fir.type<_QMderived_type_finalizationTt1{a:i32}>>) -> !fir.box<!fir.type<_QMderived_type_finalizationTt1{a:i32}>>
265-
! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[EMBOX]] : (!fir.box<!fir.type<_QMderived_type_finalizationTt1{a:i32}>>) -> !fir.box<none>
266-
! CHECK: %{{.*}} = fir.call @_FortranADestroy(%[[BOX_NONE]]) {{.*}} : (!fir.box<none>) -> none
267-
! CHECK: return
254+
! CHECK-NOT: fir.call @_FortranADestroy
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
! Check that InputDerivedType/OutputDeriverType APIs are used
2+
! for io of derived types.
3+
! RUN: bbc -polymorphic-type -emit-fir -o - %s | FileCheck %s
4+
5+
module p
6+
type :: person
7+
type(person), pointer :: next => null()
8+
end type person
9+
type :: club
10+
class(person), allocatable :: membership(:)
11+
end type club
12+
contains
13+
subroutine pwf (dtv,unit,iotype,vlist,iostat,iomsg)
14+
class(person), intent(in) :: dtv
15+
integer, intent(in) :: unit
16+
character (len=*), intent(in) :: iotype
17+
integer, intent(in) :: vlist(:)
18+
integer, intent(out) :: iostat
19+
character (len=*), intent(inout) :: iomsg
20+
print *, 'write'
21+
end subroutine pwf
22+
subroutine prf (dtv,unit,iotype,vlist,iostat,iomsg)
23+
class(person), intent(inout) :: dtv
24+
integer, intent(in) :: unit
25+
character (len=*), intent(in) :: iotype
26+
integer, intent(in) :: vlist(:)
27+
integer, intent(out) :: iostat
28+
character (len=*), intent(inout) :: iomsg
29+
end subroutine prf
30+
subroutine test1(dtv)
31+
interface read(formatted)
32+
module procedure prf
33+
end interface read(formatted)
34+
class(person), intent(inout) :: dtv
35+
read(7, fmt='(DT)') dtv%next
36+
end subroutine test1
37+
! CHECK-LABEL: func.func @_QMpPtest1(
38+
! CHECK: %{{.*}} = fir.call @_FortranAioInputDerivedType(%{{.*}}, %{{.*}}, %{{.*}}) fastmath<contract> : (!fir.ref<i8>, !fir.box<none>, !fir.ref<none>) -> i1
39+
40+
subroutine test2(social_club)
41+
interface read(formatted)
42+
module procedure prf
43+
end interface read(formatted)
44+
class(club) :: social_club
45+
read(7, fmt='(DT)') social_club%membership(0)
46+
end subroutine test2
47+
! CHECK-LABEL: func.func @_QMpPtest2(
48+
! CHECK: %{{.*}} = fir.call @_FortranAioInputDerivedType(%{{.*}}, %{{.*}}, %{{.*}}) fastmath<contract> : (!fir.ref<i8>, !fir.box<none>, !fir.ref<none>) -> i1
49+
50+
subroutine test3(dtv)
51+
interface write(formatted)
52+
module procedure pwf
53+
end interface write(formatted)
54+
class(person), intent(inout) :: dtv
55+
write(7, fmt='(DT)') dtv%next
56+
end subroutine test3
57+
! CHECK-LABEL: func.func @_QMpPtest3(
58+
! CHECK: %{{.*}} = fir.call @_FortranAioOutputDerivedType(%{{.*}}, %{{.*}}, %{{.*}}) fastmath<contract> : (!fir.ref<i8>, !fir.box<none>, !fir.ref<none>) -> i1
59+
60+
subroutine test4(social_club)
61+
interface write(formatted)
62+
module procedure pwf
63+
end interface write(formatted)
64+
class(club) :: social_club
65+
write(7, fmt='(DT)') social_club%membership(0)
66+
end subroutine test4
67+
! CHECK-LABEL: func.func @_QMpPtest4(
68+
! CHECK: %{{.*}} = fir.call @_FortranAioOutputDerivedType(%{{.*}}, %{{.*}}, %{{.*}}) fastmath<contract> : (!fir.ref<i8>, !fir.box<none>, !fir.ref<none>) -> i1
69+
end module p
70+

flang/test/Lower/polymorphic.f90

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -766,7 +766,7 @@ subroutine test_polymorphic_io()
766766
! CHECK: %[[P:.*]] = fir.alloca !fir.class<!fir.ptr<!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>>> {bindc_name = "p", uniq_name = "_QMpolymorphic_testFtest_polymorphic_ioEp"}
767767
! CHECK: %[[LOAD_P:.*]] = fir.load %[[P]] : !fir.ref<!fir.class<!fir.ptr<!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>>>>
768768
! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[LOAD_P]] : (!fir.class<!fir.ptr<!fir.type<_QMpolymorphic_testTp1{a:i32,b:i32}>>>) -> !fir.box<none>
769-
! CHECK: %{{.*}} = fir.call @_FortranAioInputDescriptor(%{{.*}}, %[[BOX_NONE]]) {{.*}} : (!fir.ref<i8>, !fir.box<none>) -> i1
769+
! CHECK: %{{.*}} = fir.call @_FortranAioInputDerivedType(%{{.*}}, %[[BOX_NONE]], %{{.*}}) {{.*}} : (!fir.ref<i8>, !fir.box<none>, !fir.ref<none>) -> i1
770770

771771
function unlimited_polymorphic_alloc_array_ret()
772772
class(*), allocatable :: unlimited_polymorphic_alloc_array_ret(:)

llvm/CODE_OWNERS.TXT

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,6 @@ E: [email protected]
7575
7676
D: MCA, llvm-mca
7777

78-
N: Duncan P. N. Exon Smith
79-
80-
D: Branch weights and BlockFrequencyInfo
81-
8278
N: Hal Finkel
8379
8480
D: The loop reroller and alias analysis

llvm/docs/LangRef.rst

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -742,16 +742,16 @@ an optional list of attached :ref:`metadata <metadata>`.
742742
Variables and aliases can have a
743743
:ref:`Thread Local Storage Model <tls_model>`.
744744

745-
:ref:`Scalable vectors <t_vector>` cannot be global variables or members of
746-
arrays because their size is unknown at compile time. They are allowed in
747-
structs to facilitate intrinsics returning multiple values. Generally, structs
748-
containing scalable vectors are not considered "sized" and cannot be used in
749-
loads, stores, allocas, or GEPs. The only exception to this rule is for structs
750-
that contain scalable vectors of the same type (e.g. ``{<vscale x 2 x i32>,
751-
<vscale x 2 x i32>}`` contains the same type while ``{<vscale x 2 x i32>,
752-
<vscale x 2 x i64>}`` doesn't). These kinds of structs (we may call them
753-
homogeneous scalable vector structs) are considered sized and can be used in
754-
loads, stores, allocas, but not GEPs.
745+
Globals cannot be or contain :ref:`Scalable vectors <t_vector>` because their
746+
size is unknown at compile time. They are allowed in structs to facilitate
747+
intrinsics returning multiple values. Generally, structs containing scalable
748+
vectors are not considered "sized" and cannot be used in loads, stores, allocas,
749+
or GEPs. The only exception to this rule is for structs that contain scalable
750+
vectors of the same type (e.g. ``{<vscale x 2 x i32>, <vscale x 2 x i32>}``
751+
contains the same type while ``{<vscale x 2 x i32>, <vscale x 2 x i64>}``
752+
doesn't). These kinds of structs (we may call them homogeneous scalable vector
753+
structs) are considered sized and can be used in loads, stores, allocas, but
754+
not GEPs.
755755

756756
Syntax::
757757

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 474717
19+
#define LLVM_MAIN_REVISION 474729
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/include/llvm/IR/Type.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,7 @@ class Type {
209209
/// Return true if this is a target extension type with a scalable layout.
210210
bool isScalableTargetExtTy() const;
211211

212-
/// Return true if this is a scalable vector type or a target extension type
213-
/// with a scalable layout.
212+
/// Return true if this is a type whose size is a known multiple of vscale.
214213
bool isScalableTy() const;
215214

216215
/// Return true if this is a FP type or a vector of FP.

llvm/lib/Analysis/InstructionSimplify.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4934,7 +4934,7 @@ static Value *simplifyGEPInst(Type *SrcTy, Value *Ptr,
49344934
return UndefValue::get(GEPTy);
49354935

49364936
bool IsScalableVec =
4937-
isa<ScalableVectorType>(SrcTy) || any_of(Indices, [](const Value *V) {
4937+
SrcTy->isScalableTy() || any_of(Indices, [](const Value *V) {
49384938
return isa<ScalableVectorType>(V->getType());
49394939
});
49404940

llvm/lib/IR/Operator.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,7 @@ bool GEPOperator::accumulateConstantOffset(
127127
auto end = generic_gep_type_iterator<decltype(Index.end())>::end(Index.end());
128128
for (auto GTI = begin, GTE = end; GTI != GTE; ++GTI) {
129129
// Scalable vectors are multiplied by a runtime constant.
130-
bool ScalableType = false;
131-
if (isa<ScalableVectorType>(GTI.getIndexedType()))
132-
ScalableType = true;
130+
bool ScalableType = GTI.getIndexedType()->isScalableTy();
133131

134132
Value *V = GTI.getOperand();
135133
StructType *STy = GTI.getStructTypeOrNull();
@@ -189,7 +187,7 @@ bool GEPOperator::collectOffset(
189187
for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this);
190188
GTI != GTE; ++GTI) {
191189
// Scalable vectors are multiplied by a runtime constant.
192-
bool ScalableType = isa<ScalableVectorType>(GTI.getIndexedType());
190+
bool ScalableType = GTI.getIndexedType()->isScalableTy();
193191

194192
Value *V = GTI.getOperand();
195193
StructType *STy = GTI.getStructTypeOrNull();

llvm/lib/IR/Type.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ bool Type::isIntegerTy(unsigned Bitwidth) const {
5858
}
5959

6060
bool Type::isScalableTy() const {
61+
if (const auto *ATy = dyn_cast<ArrayType>(this))
62+
return ATy->getElementType()->isScalableTy();
6163
if (const auto *STy = dyn_cast<StructType>(this)) {
6264
SmallPtrSet<Type *, 4> Visited;
6365
return STy->containsScalableVectorType(&Visited);
@@ -658,8 +660,7 @@ ArrayType *ArrayType::get(Type *ElementType, uint64_t NumElements) {
658660
bool ArrayType::isValidElementType(Type *ElemTy) {
659661
return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
660662
!ElemTy->isMetadataTy() && !ElemTy->isFunctionTy() &&
661-
!ElemTy->isTokenTy() && !ElemTy->isX86_AMXTy() &&
662-
!isa<ScalableVectorType>(ElemTy);
663+
!ElemTy->isTokenTy() && !ElemTy->isX86_AMXTy();
663664
}
664665

665666
//===----------------------------------------------------------------------===//

llvm/lib/IR/Verifier.cpp

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -850,17 +850,9 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
850850
}
851851

852852
// Scalable vectors cannot be global variables, since we don't know
853-
// the runtime size. If the global is an array containing scalable vectors,
854-
// that will be caught by the isValidElementType methods in StructType or
855-
// ArrayType instead.
856-
Check(!isa<ScalableVectorType>(GV.getValueType()),
857-
"Globals cannot contain scalable vectors", &GV);
858-
859-
if (auto *STy = dyn_cast<StructType>(GV.getValueType())) {
860-
SmallPtrSet<Type *, 4> Visited;
861-
Check(!STy->containsScalableVectorType(&Visited),
862-
"Globals cannot contain scalable vectors", &GV);
863-
}
853+
// the runtime size.
854+
Check(!GV.getValueType()->isScalableTy(),
855+
"Globals cannot contain scalable types", &GV);
864856

865857
// Check if it's a target extension type that disallows being used as a
866858
// global.

llvm/lib/Target/AArch64/AArch64.td

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,18 @@ def FeatureD128 : SubtargetFeature<"d128", "HasD128",
570570
"and Instructions (FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, FEAT_SYSINSTR128)",
571571
[FeatureLSE128]>;
572572

573+
def FeatureDisableLdp : SubtargetFeature<"disable-ldp", "HasDisableLdp",
574+
"true", "Do not emit ldp">;
575+
576+
def FeatureDisableStp : SubtargetFeature<"disable-stp", "HasDisableStp",
577+
"true", "Do not emit stp">;
578+
579+
def FeatureLdpAlignedOnly : SubtargetFeature<"ldp-aligned-only", "HasLdpAlignedOnly",
580+
"true", "In order to emit ldp, first check if the load will be aligned to 2 * element_size">;
581+
582+
def FeatureStpAlignedOnly : SubtargetFeature<"stp-aligned-only", "HasStpAlignedOnly",
583+
"true", "In order to emit stp, first check if the store will be aligned to 2 * element_size">;
584+
573585
//===----------------------------------------------------------------------===//
574586
// Architectures.
575587
//
@@ -1239,7 +1251,9 @@ def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1",
12391251
FeatureArithmeticBccFusion,
12401252
FeatureCmpBccFusion,
12411253
FeatureFuseAddress,
1242-
FeatureFuseLiterals]>;
1254+
FeatureFuseLiterals,
1255+
FeatureLdpAlignedOnly,
1256+
FeatureStpAlignedOnly]>;
12431257

12441258
def TuneAmpere1A : SubtargetFeature<"ampere1a", "ARMProcFamily", "Ampere1A",
12451259
"Ampere Computing Ampere-1A processors", [
@@ -1252,7 +1266,9 @@ def TuneAmpere1A : SubtargetFeature<"ampere1a", "ARMProcFamily", "Ampere1A",
12521266
FeatureCmpBccFusion,
12531267
FeatureFuseAddress,
12541268
FeatureFuseLiterals,
1255-
FeatureFuseLiterals]>;
1269+
FeatureFuseLiterals,
1270+
FeatureLdpAlignedOnly,
1271+
FeatureStpAlignedOnly]>;
12561272

12571273
def ProcessorFeatures {
12581274
list<SubtargetFeature> A53 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,

llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2136,6 +2136,14 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
21362136
if (!TII->isCandidateToMergeOrPair(MI))
21372137
return false;
21382138

2139+
// If disable-ldp feature is opted, do not emit ldp.
2140+
if (MI.mayLoad() && Subtarget->hasDisableLdp())
2141+
return false;
2142+
2143+
// If disable-stp feature is opted, do not emit stp.
2144+
if (MI.mayStore() && Subtarget->hasDisableStp())
2145+
return false;
2146+
21392147
// Early exit if the offset is not possible to match. (6 bits of positive
21402148
// range, plus allow an extra one in case we find a later insn that matches
21412149
// with Offset-1)
@@ -2159,6 +2167,31 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
21592167
// Keeping the iterator straight is a pain, so we let the merge routine tell
21602168
// us what the next instruction is after it's done mucking about.
21612169
auto Prev = std::prev(MBBI);
2170+
2171+
// Fetch the memoperand of the load/store that is a candidate for
2172+
// combination.
2173+
MachineMemOperand *MemOp =
2174+
MI.memoperands_empty() ? nullptr : MI.memoperands().front();
2175+
2176+
// Get the needed alignments to check them if
2177+
// ldp-aligned-only/stp-aligned-only features are opted.
2178+
uint64_t MemAlignment = MemOp ? MemOp->getAlign().value() : -1;
2179+
uint64_t TypeAlignment = MemOp ? Align(MemOp->getSize()).value() : -1;
2180+
2181+
// If a load arrives and ldp-aligned-only feature is opted, check that the
2182+
// alignment of the source pointer is at least double the alignment of the
2183+
// type.
2184+
if (MI.mayLoad() && Subtarget->hasLdpAlignedOnly() && MemOp &&
2185+
MemAlignment < 2 * TypeAlignment)
2186+
return false;
2187+
2188+
// If a store arrives and stp-aligned-only feature is opted, check that the
2189+
// alignment of the source pointer is at least double the alignment of the
2190+
// type.
2191+
if (MI.mayStore() && Subtarget->hasStpAlignedOnly() && MemOp &&
2192+
MemAlignment < 2 * TypeAlignment)
2193+
return false;
2194+
21622195
MBBI = mergePairedInsns(MBBI, Paired, Flags);
21632196
// Collect liveness info for instructions between Prev and the new position
21642197
// MBBI.

0 commit comments

Comments
 (0)