Skip to content

Commit 94eb3ec

Browse files
committed
merge main into amd-staging
Change-Id: Ifbc83fa0044fbba9b5ae5e10a3a33c378f96ee9e
2 parents ffe8ddb + 435aa76 commit 94eb3ec

File tree

18 files changed

+755
-70
lines changed

18 files changed

+755
-70
lines changed

.github/workflows/containers/github-action-ci/stage1.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ RUN cmake -B ./build -G Ninja ./llvm \
3737
-DLLVM_ENABLE_RUNTIMES="compiler-rt" \
3838
-DCMAKE_INSTALL_PREFIX="$LLVM_SYSROOT" \
3939
-DLLVM_ENABLE_PROJECTS="bolt;clang;lld;clang-tools-extra" \
40-
-DLLVM_DISTRIBUTION_COMPONENTS="lld;compiler-rt;clang-format" \
40+
-DLLVM_DISTRIBUTION_COMPONENTS="lld;compiler-rt;clang-format;scan-build" \
4141
-DCLANG_DEFAULT_LINKER="lld" \
4242
-DBOOTSTRAP_CLANG_PGO_TRAINING_DATA_SOURCE_DIR=/llvm-project-llvmorg-$LLVM_VERSION/llvm
4343

clang-tools-extra/clang-tidy/misc/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ add_clang_library(clangTidyMiscModule
4343
UseAnonymousNamespaceCheck.cpp
4444

4545
LINK_LIBS
46-
clangAnalysis
4746
clangTidy
4847
clangTidyUtils
4948

clang/lib/Sema/Scope.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,11 @@ void Scope::dumpImpl(raw_ostream &OS) const {
228228
{CompoundStmtScope, "CompoundStmtScope"},
229229
{ClassInheritanceScope, "ClassInheritanceScope"},
230230
{CatchScope, "CatchScope"},
231+
{ConditionVarScope, "ConditionVarScope"},
232+
{OpenMPOrderClauseScope, "OpenMPOrderClauseScope"},
233+
{LambdaScope, "LambdaScope"},
231234
{OpenACCComputeConstructScope, "OpenACCComputeConstructScope"},
235+
{TypeAliasScope, "TypeAliasScope"},
232236
{FriendScope, "FriendScope"},
233237
};
234238

flang/lib/Lower/OpenMP/Clauses.h

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,30 +36,64 @@ struct TypeTy : public evaluate::SomeType {
3636
bool operator==(const TypeTy &t) const { return true; }
3737
};
3838

39-
using IdTy = semantics::Symbol *;
39+
template <typename ExprTy>
40+
struct IdTyTemplate {
41+
// "symbol" is always non-null for id's of actual objects.
42+
Fortran::semantics::Symbol *symbol;
43+
std::optional<ExprTy> designator;
44+
45+
bool operator==(const IdTyTemplate &other) const {
46+
// If symbols are different, then the objects are different.
47+
if (symbol != other.symbol)
48+
return false;
49+
if (symbol == nullptr)
50+
return true;
51+
// Equal symbols don't necessarily indicate identical objects,
52+
// for example, a derived object component may use a single symbol,
53+
// which will refer to different objects for different designators,
54+
// e.g. a%c and b%c.
55+
return designator == other.designator;
56+
}
57+
58+
operator bool() const { return symbol != nullptr; }
59+
};
60+
4061
using ExprTy = SomeExpr;
4162

4263
template <typename T>
4364
using List = tomp::ListT<T>;
4465
} // namespace Fortran::lower::omp
4566

67+
// Specialization of the ObjectT template
4668
namespace tomp::type {
4769
template <>
48-
struct ObjectT<Fortran::lower::omp::IdTy, Fortran::lower::omp::ExprTy> {
49-
using IdTy = Fortran::lower::omp::IdTy;
70+
struct ObjectT<Fortran::lower::omp::IdTyTemplate<Fortran::lower::omp::ExprTy>,
71+
Fortran::lower::omp::ExprTy> {
72+
using IdTy = Fortran::lower::omp::IdTyTemplate<Fortran::lower::omp::ExprTy>;
5073
using ExprTy = Fortran::lower::omp::ExprTy;
5174

52-
IdTy id() const { return symbol; }
53-
Fortran::semantics::Symbol *sym() const { return symbol; }
54-
const std::optional<ExprTy> &ref() const { return designator; }
75+
IdTy id() const { return identity; }
76+
Fortran::semantics::Symbol *sym() const { return identity.symbol; }
77+
const std::optional<ExprTy> &ref() const { return identity.designator; }
5578

56-
IdTy symbol;
57-
std::optional<ExprTy> designator;
79+
IdTy identity;
5880
};
5981
} // namespace tomp::type
6082

6183
namespace Fortran::lower::omp {
84+
using IdTy = IdTyTemplate<ExprTy>;
85+
}
6286

87+
namespace std {
88+
template <>
89+
struct hash<Fortran::lower::omp::IdTy> {
90+
size_t operator()(const Fortran::lower::omp::IdTy &id) const {
91+
return static_cast<size_t>(reinterpret_cast<uintptr_t>(id.symbol));
92+
}
93+
};
94+
} // namespace std
95+
96+
namespace Fortran::lower::omp {
6397
using Object = tomp::ObjectT<IdTy, ExprTy>;
6498
using ObjectList = tomp::ObjectListT<IdTy, ExprTy>;
6599

flang/lib/Lower/OpenMP/Utils.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ void addChildIndexAndMapToParent(
188188
std::map<const semantics::Symbol *,
189189
llvm::SmallVector<OmpMapMemberIndicesData>> &parentMemberIndices,
190190
mlir::omp::MapInfoOp &mapOp, semantics::SemanticsContext &semaCtx) {
191-
std::optional<evaluate::DataRef> dataRef = ExtractDataRef(object.designator);
191+
std::optional<evaluate::DataRef> dataRef = ExtractDataRef(object.ref());
192192
assert(dataRef.has_value() &&
193193
"DataRef could not be extracted during mapping of derived type "
194194
"cannot proceed");
Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,22 @@
11
! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
22
! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s
33

4-
! CHECK: %[[V0:[0-9]+]] = fir.alloca !fir.type<_QFfooTt0{a0:i32,a1:i32}> {bindc_name = "a", uniq_name = "_QFfooEa"}
5-
! CHECK: %[[V1:[0-9]+]]:2 = hlfir.declare %[[V0]] {uniq_name = "_QFfooEa"} : (!fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>) -> (!fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>, !fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>)
6-
! CHECK: %[[V2:[0-9]+]] = hlfir.designate %[[V1]]#0{"a1"} : (!fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>) -> !fir.ref<i32>
4+
! CHECK-LABEL: func.func @_QPfoo1
5+
! CHECK: %[[V0:[0-9]+]] = fir.alloca !fir.type<_QFfoo1Tt0{a0:i32,a1:i32}> {bindc_name = "a", uniq_name = "_QFfoo1Ea"}
6+
! CHECK: %[[V1:[0-9]+]]:2 = hlfir.declare %[[V0]] {uniq_name = "_QFfoo1Ea"} : (!fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>) -> (!fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>, !fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>)
7+
! CHECK: %[[V2:[0-9]+]] = hlfir.designate %[[V1]]#0{"a1"} : (!fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>) -> !fir.ref<i32>
78
! CHECK: %[[V3:[0-9]+]] = omp.map.info var_ptr(%[[V2]] : !fir.ref<i32>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "a%a1"}
8-
! CHECK: %[[V4:[0-9]+]] = omp.map.info var_ptr(%[[V1]]#1 : !fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>, !fir.type<_QFfooTt0{a0:i32,a1:i32}>) map_clauses(tofrom) capture(ByRef) members(%[[V3]] : [1] : !fir.ref<i32>) -> !fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>> {name = "a", partial_map = true}
9-
! CHECK: omp.target map_entries(%[[V3]] -> %arg0, %[[V4]] -> %arg1 : !fir.ref<i32>, !fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>) {
10-
! CHECK: ^bb0(%arg0: !fir.ref<i32>, %arg1: !fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>):
11-
! CHECK: %[[V5:[0-9]+]]:2 = hlfir.declare %arg1 {uniq_name = "_QFfooEa"} : (!fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>) -> (!fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>, !fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>)
9+
! CHECK: %[[V4:[0-9]+]] = omp.map.info var_ptr(%[[V1]]#1 : !fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>, !fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>) map_clauses(tofrom) capture(ByRef) members(%[[V3]] : [1] : !fir.ref<i32>) -> !fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>> {name = "a", partial_map = true}
10+
! CHECK: omp.target map_entries(%[[V3]] -> %arg0, %[[V4]] -> %arg1 : !fir.ref<i32>, !fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>) {
11+
! CHECK: ^bb0(%arg0: !fir.ref<i32>, %arg1: !fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>):
12+
! CHECK: %[[V5:[0-9]+]]:2 = hlfir.declare %arg1 {uniq_name = "_QFfoo1Ea"} : (!fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>) -> (!fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>, !fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>)
1213
! CHECK: %c0_i32 = arith.constant 0 : i32
13-
! CHECK: %[[V6:[0-9]+]] = hlfir.designate %[[V5]]#0{"a1"} : (!fir.ref<!fir.type<_QFfooTt0{a0:i32,a1:i32}>>) -> !fir.ref<i32>
14+
! CHECK: %[[V6:[0-9]+]] = hlfir.designate %[[V5]]#0{"a1"} : (!fir.ref<!fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>>) -> !fir.ref<i32>
1415
! CHECK: hlfir.assign %c0_i32 to %[[V6]] : i32, !fir.ref<i32>
1516
! CHECK: omp.terminator
1617
! CHECK: }
1718

18-
subroutine foo()
19+
subroutine foo1()
1920
implicit none
2021

2122
type t0
@@ -29,3 +30,25 @@ subroutine foo()
2930
!$omp end target
3031
end
3132

33+
34+
! CHECK-LABEL: func.func @_QPfoo2
35+
! CHECK-DAG: omp.map.info var_ptr(%{{[0-9]+}} : {{.*}} map_clauses(to) capture(ByRef) bounds(%{{[0-9]+}}) -> {{.*}} {name = "t%b(1_8)%a(1)"}
36+
! CHECK-DAG: omp.map.info var_ptr(%{{[0-9]+}} : {{.*}} map_clauses(from) capture(ByRef) bounds(%{{[0-9]+}}) -> {{.*}} {name = "u%b(1_8)%a(1)"}
37+
subroutine foo2()
38+
implicit none
39+
40+
type t0
41+
integer :: a(10)
42+
end type
43+
44+
type t1
45+
type(t0) :: b(10)
46+
end type
47+
48+
type(t1) :: t, u
49+
50+
!$omp target map(to: t%b(1)%a(1)) map(from: u%b(1)%a(1))
51+
t%b(1)%a(1) = u%b(1)%a(1)
52+
!$omp end target
53+
54+
end

libc/config/linux/aarch64/entrypoints.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,10 @@ if(LIBC_TYPES_HAS_FLOAT16)
516516
libc.src.math.nearbyintf16
517517
libc.src.math.nextafterf16
518518
libc.src.math.nextdownf16
519-
libc.src.math.nexttowardf16
519+
# Temporarily disable nexttowardf16 on aarch64 because the conversion
520+
# between _Float16 and long double will crash clang-11. This is fixed in
521+
# clang-12 and after: https://godbolt.org/z/8ceT9454c
522+
# libc.src.math.nexttowardf16
520523
libc.src.math.nextupf16
521524
libc.src.math.rintf16
522525
libc.src.math.roundf16

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9159,6 +9159,7 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
91599159
if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
91609160
isOperationLegal(ISD::SMAX, VT)) {
91619161
SDValue Zero = DAG.getConstant(0, dl, VT);
9162+
Op = DAG.getFreeze(Op);
91629163
return DAG.getNode(ISD::SMAX, dl, VT, Op,
91639164
DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
91649165
}
@@ -9175,8 +9176,8 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
91759176
// 0 - abs(x) -> smin(x, sub(0,x))
91769177
if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
91779178
isOperationLegal(ISD::SMIN, VT)) {
9178-
Op = DAG.getFreeze(Op);
91799179
SDValue Zero = DAG.getConstant(0, dl, VT);
9180+
Op = DAG.getFreeze(Op);
91809181
return DAG.getNode(ISD::SMIN, dl, VT, Op,
91819182
DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
91829183
}

llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,12 @@ static bool shouldInspect(MachineInstr &MI) {
115115
return isDomainMVE(&MI) || isVectorPredicate(&MI) || hasVPRUse(MI);
116116
}
117117

118+
static bool isHorizontalReduction(const MachineInstr &MI) {
119+
const MCInstrDesc &MCID = MI.getDesc();
120+
uint64_t Flags = MCID.TSFlags;
121+
return (Flags & ARMII::HorizontalReduction) != 0;
122+
}
123+
118124
namespace {
119125

120126
using InstSet = SmallPtrSetImpl<MachineInstr *>;
@@ -275,6 +281,16 @@ namespace {
275281
if (VPT->getOpcode() == ARM::MVE_VPST)
276282
return false;
277283

284+
// If the VPT block does not define something that is an "output", then
285+
// the tail-predicated version will just perform a subset of the original
286+
// vpt block, where the last lanes should not be used.
287+
if (isVPTOpcode(VPT->getOpcode()) &&
288+
all_of(Block.getInsts(), [](const MachineInstr *MI) {
289+
return !MI->mayStore() && !MI->mayLoad() &&
290+
!isHorizontalReduction(*MI) && !isVCTP(MI);
291+
}))
292+
return true;
293+
278294
auto IsOperandPredicated = [&](MachineInstr *MI, unsigned Idx) {
279295
MachineInstr *Op = RDA.getMIOperand(MI, MI->getOperand(Idx));
280296
return Op && PredicatedInsts.count(Op) && isPredicatedOnVCTP(Op);
@@ -813,12 +829,6 @@ static bool producesDoubleWidthResult(const MachineInstr &MI) {
813829
return (Flags & ARMII::DoubleWidthResult) != 0;
814830
}
815831

816-
static bool isHorizontalReduction(const MachineInstr &MI) {
817-
const MCInstrDesc &MCID = MI.getDesc();
818-
uint64_t Flags = MCID.TSFlags;
819-
return (Flags & ARMII::HorizontalReduction) != 0;
820-
}
821-
822832
// Can this instruction generate a non-zero result when given only zeroed
823833
// operands? This allows us to know that, given operands with false bytes
824834
// zeroed by masked loads, that the result will also contain zeros in those

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
171171
// Set operations for 'F' feature.
172172

173173
if (Subtarget.hasBasicF()) {
174+
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
175+
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
174176
setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
175177

176178
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
@@ -186,6 +188,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
186188
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
187189
setOperationAction(ISD::FPOW, MVT::f32, Expand);
188190
setOperationAction(ISD::FREM, MVT::f32, Expand);
191+
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
192+
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
189193

190194
if (Subtarget.is64Bit())
191195
setOperationAction(ISD::FRINT, MVT::f32, Legal);
@@ -202,7 +206,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
202206
// Set operations for 'D' feature.
203207

204208
if (Subtarget.hasBasicD()) {
209+
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
205210
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
211+
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
206212
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
207213
setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
208214

@@ -219,6 +225,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
219225
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
220226
setOperationAction(ISD::FPOW, MVT::f64, Expand);
221227
setOperationAction(ISD::FREM, MVT::f64, Expand);
228+
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
229+
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
222230

223231
if (Subtarget.is64Bit())
224232
setOperationAction(ISD::FRINT, MVT::f64, Legal);

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5120,6 +5120,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
51205120
case Intrinsic::x86_tileloaddt164_internal: {
51215121
if (!Subtarget->hasAMXTILE())
51225122
break;
5123+
auto *MFI =
5124+
CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
5125+
MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
51235126
unsigned Opc = IntNo == Intrinsic::x86_tileloadd64_internal
51245127
? X86::PTILELOADDV
51255128
: X86::PTILELOADDT1V;
@@ -5201,6 +5204,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
52015204
break;
52025205
}
52035206
case Intrinsic::x86_tilestored64_internal: {
5207+
auto *MFI =
5208+
CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
5209+
MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
52045210
unsigned Opc = X86::PTILESTOREDV;
52055211
// _tile_stored_internal(row, col, buf, STRIDE, c)
52065212
SDValue Base = Node->getOperand(4);
@@ -5228,6 +5234,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
52285234
case Intrinsic::x86_tilestored64: {
52295235
if (!Subtarget->hasAMXTILE())
52305236
break;
5237+
auto *MFI =
5238+
CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
5239+
MFI->setAMXProgModel(AMXProgModelEnum::DirectReg);
52315240
unsigned Opc;
52325241
switch (IntNo) {
52335242
default: llvm_unreachable("Unexpected intrinsic!");

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26790,7 +26790,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
2679026790
case Intrinsic::swift_async_context_addr: {
2679126791
SDLoc dl(Op);
2679226792
auto &MF = DAG.getMachineFunction();
26793-
auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
26793+
auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
2679426794
if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF)) {
2679526795
MF.getFrameInfo().setFrameAddressIsTaken(true);
2679626796
X86FI->setHasSwiftAsyncContext(true);
@@ -36795,7 +36795,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3679536795
}
3679636796
case TargetOpcode::PREALLOCATED_SETUP: {
3679736797
assert(Subtarget.is32Bit() && "preallocated only used in 32-bit");
36798-
auto MFI = MF->getInfo<X86MachineFunctionInfo>();
36798+
auto *MFI = MF->getInfo<X86MachineFunctionInfo>();
3679936799
MFI->setHasPreallocatedCall(true);
3680036800
int64_t PreallocatedId = MI.getOperand(0).getImm();
3680136801
size_t StackAdjustment = MFI->getPreallocatedStackSize(PreallocatedId);
@@ -36812,7 +36812,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3681236812
assert(Subtarget.is32Bit() && "preallocated calls only used in 32-bit");
3681336813
int64_t PreallocatedId = MI.getOperand(1).getImm();
3681436814
int64_t ArgIdx = MI.getOperand(2).getImm();
36815-
auto MFI = MF->getInfo<X86MachineFunctionInfo>();
36815+
auto *MFI = MF->getInfo<X86MachineFunctionInfo>();
3681636816
size_t ArgOffset = MFI->getPreallocatedArgOffsets(PreallocatedId)[ArgIdx];
3681736817
LLVM_DEBUG(dbgs() << "PREALLOCATED_ARG arg index " << ArgIdx
3681836818
<< ", arg offset " << ArgOffset << "\n");
@@ -36855,6 +36855,13 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
3685536855
unsigned Imm = MI.getOperand(0).getImm();
3685636856
BuildMI(*BB, MI, MIMD, TII->get(X86::TILEZERO), TMMImmToTMMReg(Imm));
3685736857
MI.eraseFromParent(); // The pseudo is gone now.
36858+
auto *MFI = MF->getInfo<X86MachineFunctionInfo>();
36859+
MFI->setAMXProgModel(AMXProgModelEnum::DirectReg);
36860+
return BB;
36861+
}
36862+
case X86::PTILEZEROV: {
36863+
auto *MFI = MF->getInfo<X86MachineFunctionInfo>();
36864+
MFI->setAMXProgModel(AMXProgModelEnum::ManagedRA);
3685836865
return BB;
3685936866
}
3686036867
case X86::PTILELOADD:

llvm/lib/Target/X86/X86InstrAMX.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ let SchedRW = [WriteSystem] in {
7474
GR16:$src2, opaquemem:$src3,
7575
TILE:$src4), []>;
7676
let isPseudo = true, isReMaterializable = 1, isAsCheapAsAMove = 1,
77-
canFoldAsLoad = 1 in
77+
canFoldAsLoad = 1, usesCustomInserter = 1 in
7878
def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2),
7979
[(set TILE:$dst, (int_x86_tilezero_internal
8080
GR16:$src1, GR16:$src2))]>;

0 commit comments

Comments
 (0)