Skip to content

Commit 7ad30b5

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:3f0bddb56ac3 into amd-gfx:32727405140d
Local branch amd-gfx 3272740 Merged main:f435f55d5854 into amd-gfx:863c69027cfb Remote branch main 3f0bddb Use llvm::find (NFC)
2 parents 3272740 + 3f0bddb commit 7ad30b5

20 files changed

+154
-90
lines changed

lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ bool DYLDRendezvous::RemoveSOEntriesFromRemote(
482482

483483
// Only add shared libraries and not the executable.
484484
if (!SOEntryIsMainExecutable(entry)) {
485-
auto pos = std::find(m_soentries.begin(), m_soentries.end(), entry);
485+
auto pos = llvm::find(m_soentries, entry);
486486
if (pos == m_soentries.end())
487487
return false;
488488

lldb/source/Target/TargetList.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ Status TargetList::CreateTargetInternal(Debugger &debugger,
370370

371371
bool TargetList::DeleteTarget(TargetSP &target_sp) {
372372
std::lock_guard<std::recursive_mutex> guard(m_target_list_mutex);
373-
auto it = std::find(m_target_list.begin(), m_target_list.end(), target_sp);
373+
auto it = llvm::find(m_target_list, target_sp);
374374
if (it == m_target_list.end())
375375
return false;
376376

@@ -506,7 +506,7 @@ lldb::TargetSP TargetList::GetTargetAtIndex(uint32_t idx) const {
506506

507507
uint32_t TargetList::GetIndexOfTarget(lldb::TargetSP target_sp) const {
508508
std::lock_guard<std::recursive_mutex> guard(m_target_list_mutex);
509-
auto it = std::find(m_target_list.begin(), m_target_list.end(), target_sp);
509+
auto it = llvm::find(m_target_list, target_sp);
510510
if (it != m_target_list.end())
511511
return std::distance(m_target_list.begin(), it);
512512
return UINT32_MAX;
@@ -533,7 +533,7 @@ void TargetList::SetSelectedTarget(uint32_t index) {
533533

534534
void TargetList::SetSelectedTarget(const TargetSP &target_sp) {
535535
std::lock_guard<std::recursive_mutex> guard(m_target_list_mutex);
536-
auto it = std::find(m_target_list.begin(), m_target_list.end(), target_sp);
536+
auto it = llvm::find(m_target_list, target_sp);
537537
SetSelectedTargetInternal(std::distance(m_target_list.begin(), it));
538538
}
539539

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 475741
19+
#define LLVM_MAIN_REVISION 475744
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/include/llvm/Support/SwapByteOrder.h

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -73,23 +73,11 @@ inline unsigned long long getSwappedBytes(unsigned long long C) { return llvm::b
7373
inline signed long long getSwappedBytes( signed long long C) { return llvm::byteswap(C); }
7474

7575
inline float getSwappedBytes(float C) {
76-
union {
77-
uint32_t i;
78-
float f;
79-
} in, out;
80-
in.f = C;
81-
out.i = llvm::byteswap(in.i);
82-
return out.f;
76+
return llvm::bit_cast<float>(llvm::byteswap(llvm::bit_cast<uint32_t>(C)));
8377
}
8478

8579
inline double getSwappedBytes(double C) {
86-
union {
87-
uint64_t i;
88-
double d;
89-
} in, out;
90-
in.d = C;
91-
out.i = llvm::byteswap(in.i);
92-
return out.d;
80+
return llvm::bit_cast<double>(llvm::byteswap(llvm::bit_cast<uint64_t>(C)));
9381
}
9482

9583
template <typename T>

llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,7 @@ Error ExecutorSharedMemoryMapperService::deinitialize(
194194

195195
// Remove the allocation from the allocation list of its reservation
196196
for (auto &Reservation : Reservations) {
197-
auto AllocationIt =
198-
std::find(Reservation.second.Allocations.begin(),
199-
Reservation.second.Allocations.end(), Base);
197+
auto AllocationIt = llvm::find(Reservation.second.Allocations, Base);
200198
if (AllocationIt != Reservation.second.Allocations.end()) {
201199
Reservation.second.Allocations.erase(AllocationIt);
202200
break;

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1692,6 +1692,15 @@ def HasTrue16BitInsts : Predicate<"Subtarget->hasTrue16BitInsts()">,
16921692
AssemblerPredicate<(all_of FeatureTrue16BitInsts)>;
16931693
def NotHasTrue16BitInsts : Predicate<"!Subtarget->hasTrue16BitInsts()">;
16941694

1695+
// Control use of True16 instructions. The real True16 instructions are
1696+
// True16 instructions as they are defined in the ISA. Fake True16
1697+
// instructions have the same encoding as real ones but syntactically
1698+
// only allow 32-bit registers in operands and use low halves thereof.
1699+
def UseRealTrue16Insts : Predicate<"Subtarget->useRealTrue16Insts()">,
1700+
AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts)>;
1701+
def UseFakeTrue16Insts : Predicate<"Subtarget->hasTrue16BitInsts() && "
1702+
"!Subtarget->useRealTrue16Insts()">;
1703+
16951704
def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
16961705
AssemblerPredicate<(all_of FeatureVOP3P)>;
16971706

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -420,11 +420,14 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
420420
// encodings
421421
if (isGFX11Plus() && Bytes.size() >= 12 ) {
422422
DecoderUInt128 DecW = eat12Bytes(Bytes);
423-
Res = tryDecodeInst(DecoderTableDPP8GFX1196, MI, DecW, Address, CS);
423+
Res =
424+
tryDecodeInst(DecoderTableDPP8GFX1196, DecoderTableDPP8GFX11_FAKE1696,
425+
MI, DecW, Address, CS);
424426
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
425427
break;
426428
MI = MCInst(); // clear
427-
Res = tryDecodeInst(DecoderTableDPPGFX1196, MI, DecW, Address, CS);
429+
Res = tryDecodeInst(DecoderTableDPPGFX1196, DecoderTableDPPGFX11_FAKE1696,
430+
MI, DecW, Address, CS);
428431
if (Res) {
429432
if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
430433
convertVOP3PDPPInst(MI);
@@ -463,15 +466,17 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
463466
break;
464467
MI = MCInst(); // clear
465468

466-
Res = tryDecodeInst(DecoderTableDPP8GFX1164, MI, QW, Address, CS);
469+
Res = tryDecodeInst(DecoderTableDPP8GFX1164,
470+
DecoderTableDPP8GFX11_FAKE1664, MI, QW, Address, CS);
467471
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
468472
break;
469473
MI = MCInst(); // clear
470474

471475
Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS);
472476
if (Res) break;
473477

474-
Res = tryDecodeInst(DecoderTableDPPGFX1164, MI, QW, Address, CS);
478+
Res = tryDecodeInst(DecoderTableDPPGFX1164, DecoderTableDPPGFX11_FAKE1664,
479+
MI, QW, Address, CS);
475480
if (Res) {
476481
if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
477482
convertVOPCDPPInst(MI);
@@ -532,7 +537,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
532537
Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS);
533538
if (Res) break;
534539

535-
Res = tryDecodeInst(DecoderTableGFX1132, MI, DW, Address, CS);
540+
Res = tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
541+
Address, CS);
536542
if (Res) break;
537543

538544
if (Bytes.size() < 4) break;
@@ -562,7 +568,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
562568
Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS);
563569
if (Res) break;
564570

565-
Res = tryDecodeInst(DecoderTableGFX1164, MI, QW, Address, CS);
571+
Res = tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
572+
Address, CS);
566573
if (Res)
567574
break;
568575

llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,17 @@ class AMDGPUDisassembler : public MCDisassembler {
144144
return MCDisassembler::Fail;
145145
}
146146

147+
template <typename InsnType>
148+
DecodeStatus tryDecodeInst(const uint8_t *Table1, const uint8_t *Table2,
149+
MCInst &MI, InsnType Inst, uint64_t Address,
150+
raw_ostream &Comments) const {
151+
for (const uint8_t *T : {Table1, Table2}) {
152+
if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
153+
return Res;
154+
}
155+
return MCDisassembler::Fail;
156+
}
157+
147158
std::optional<DecodeStatus>
148159
onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef<uint8_t> Bytes,
149160
uint64_t Address, raw_ostream &CStream) const override;

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1408,6 +1408,7 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
14081408
case AMDGPU::V_MAX_F32_e64:
14091409
case AMDGPU::V_MAX_F16_e64:
14101410
case AMDGPU::V_MAX_F16_t16_e64:
1411+
case AMDGPU::V_MAX_F16_fake16_e64:
14111412
case AMDGPU::V_MAX_F64_e64:
14121413
case AMDGPU::V_PK_MAX_F16: {
14131414
if (!TII->getNamedOperand(MI, AMDGPU::OpName::clamp)->getImm())
@@ -1503,7 +1504,8 @@ static int getOModValue(unsigned Opc, int64_t Val) {
15031504
}
15041505
}
15051506
case AMDGPU::V_MUL_F16_e64:
1506-
case AMDGPU::V_MUL_F16_t16_e64: {
1507+
case AMDGPU::V_MUL_F16_t16_e64:
1508+
case AMDGPU::V_MUL_F16_fake16_e64: {
15071509
switch (static_cast<uint16_t>(Val)) {
15081510
case 0x3800: // 0.5
15091511
return SIOutMods::DIV2;
@@ -1530,12 +1532,14 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
15301532
case AMDGPU::V_MUL_F64_e64:
15311533
case AMDGPU::V_MUL_F32_e64:
15321534
case AMDGPU::V_MUL_F16_t16_e64:
1535+
case AMDGPU::V_MUL_F16_fake16_e64:
15331536
case AMDGPU::V_MUL_F16_e64: {
15341537
// If output denormals are enabled, omod is ignored.
15351538
if ((Op == AMDGPU::V_MUL_F32_e64 &&
15361539
MFI->getMode().FP32Denormals.Output != DenormalMode::PreserveSign) ||
15371540
((Op == AMDGPU::V_MUL_F64_e64 || Op == AMDGPU::V_MUL_F16_e64 ||
1538-
Op == AMDGPU::V_MUL_F16_t16_e64) &&
1541+
Op == AMDGPU::V_MUL_F16_t16_e64 ||
1542+
Op == AMDGPU::V_MUL_F16_fake16_e64) &&
15391543
MFI->getMode().FP64FP16Denormals.Output != DenormalMode::PreserveSign))
15401544
return std::pair(nullptr, SIOutMods::NONE);
15411545

@@ -1565,12 +1569,14 @@ SIFoldOperands::isOMod(const MachineInstr &MI) const {
15651569
case AMDGPU::V_ADD_F64_e64:
15661570
case AMDGPU::V_ADD_F32_e64:
15671571
case AMDGPU::V_ADD_F16_e64:
1568-
case AMDGPU::V_ADD_F16_t16_e64: {
1572+
case AMDGPU::V_ADD_F16_t16_e64:
1573+
case AMDGPU::V_ADD_F16_fake16_e64: {
15691574
// If output denormals are enabled, omod is ignored.
15701575
if ((Op == AMDGPU::V_ADD_F32_e64 &&
15711576
MFI->getMode().FP32Denormals.Output != DenormalMode::PreserveSign) ||
15721577
((Op == AMDGPU::V_ADD_F64_e64 || Op == AMDGPU::V_ADD_F16_e64 ||
1573-
Op == AMDGPU::V_ADD_F16_t16_e64) &&
1578+
Op == AMDGPU::V_ADD_F16_t16_e64 ||
1579+
Op == AMDGPU::V_ADD_F16_fake16_e64) &&
15741580
MFI->getMode().FP64FP16Denormals.Output != DenormalMode::PreserveSign))
15751581
return std::pair(nullptr, SIOutMods::NONE);
15761582

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2262,6 +2262,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
22622262
field list<ValueType> ArgVT = _ArgVT;
22632263
field bit EnableClamp = _EnableClamp;
22642264
field bit IsTrue16 = 0;
2265+
field bit IsRealTrue16 = 0;
22652266

22662267
field ValueType DstVT = ArgVT[0];
22672268
field ValueType Src0VT = ArgVT[1];
@@ -2453,6 +2454,21 @@ class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.
24532454
// VOPC_Class_NoSdst_Profile_t16, and VOP_MAC_F16_t16 do not inherit from this
24542455
// class, so copy changes to this class in those profiles
24552456
class VOPProfile_True16<VOPProfile P> : VOPProfile<P.ArgVT> {
2457+
let IsTrue16 = 1;
2458+
let IsRealTrue16 = 1;
2459+
// Most DstVT are 16-bit, but not all.
2460+
let DstRC = getVALUDstForVT_t16<DstVT>.ret;
2461+
let DstRC64 = getVALUDstForVT<DstVT>.ret;
2462+
let Src1RC32 = RegisterOperand<getVregSrcForVT_t16<Src1VT>.ret>;
2463+
let Src0DPP = getVregSrcForVT_t16<Src0VT>.ret;
2464+
let Src1DPP = getVregSrcForVT_t16<Src1VT>.ret;
2465+
let Src2DPP = getVregSrcForVT_t16<Src2VT>.ret;
2466+
let Src0ModDPP = getSrcModDPP_t16<Src0VT>.ret;
2467+
let Src1ModDPP = getSrcModDPP_t16<Src1VT>.ret;
2468+
let Src2ModDPP = getSrcModDPP_t16<Src2VT>.ret;
2469+
}
2470+
2471+
class VOPProfile_Fake16<VOPProfile P> : VOPProfile<P.ArgVT> {
24562472
let IsTrue16 = 1;
24572473
// Most DstVT are 16-bit, but not all
24582474
let DstRC = getVALUDstForVT_t16<DstVT>.ret;

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1673,8 +1673,10 @@ def : ClampPat<V_MAX_F32_e64, f32>;
16731673
def : ClampPat<V_MAX_F64_e64, f64>;
16741674
let SubtargetPredicate = NotHasTrue16BitInsts in
16751675
def : ClampPat<V_MAX_F16_e64, f16>;
1676-
let SubtargetPredicate = HasTrue16BitInsts in
1676+
let SubtargetPredicate = UseRealTrue16Insts in
16771677
def : ClampPat<V_MAX_F16_t16_e64, f16>;
1678+
let SubtargetPredicate = UseFakeTrue16Insts in
1679+
def : ClampPat<V_MAX_F16_fake16_e64, f16>;
16781680

16791681
let SubtargetPredicate = HasVOP3PInsts in {
16801682
def : GCNPat <
@@ -2789,12 +2791,12 @@ def : GCNPat<
27892791
let OtherPredicates = [HasTrue16BitInsts] in {
27902792
def : GCNPat<
27912793
(fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))),
2792-
(V_MUL_F16_t16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src)
2794+
(V_MUL_F16_fake16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src)
27932795
>;
27942796

27952797
def : GCNPat<
27962798
(fcanonicalize (f16 (fneg (VOP3Mods f16:$src, i32:$src_mods)))),
2797-
(V_MUL_F16_t16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src)
2799+
(V_MUL_F16_fake16_e64 0, (i32 CONST.FP16_NEG_ONE), $src_mods, $src)
27982800
>;
27992801
} // End OtherPredicates
28002802

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ multiclass VOP1Inst_t16<string opName,
152152
defm NAME : VOP1Inst<opName, P, node>;
153153
}
154154
let OtherPredicates = [HasTrue16BitInsts] in {
155-
defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_True16<P>, node>;
155+
defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_Fake16<P>, node>;
156156
}
157157
}
158158

@@ -170,7 +170,7 @@ class VOPProfileI2F<ValueType dstVt, ValueType srcVt> :
170170
}
171171

172172
class VOPProfileI2F_True16<ValueType dstVt, ValueType srcVt> :
173-
VOPProfile_True16<VOPProfile<[dstVt, srcVt, untyped, untyped]>> {
173+
VOPProfile_Fake16<VOPProfile<[dstVt, srcVt, untyped, untyped]>> {
174174

175175
let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod);
176176
let InsVOP3Base = (ins Src0VOP3DPP:$src0, clampmod:$clamp, omod:$omod);
@@ -199,7 +199,7 @@ class VOP_SPECIAL_OMOD_PROF<ValueType dstVt, ValueType srcVt> :
199199
def VOP_I32_F32_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f32>;
200200
def VOP_I32_F64_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i32, f64>;
201201
def VOP_I16_F16_SPECIAL_OMOD : VOP_SPECIAL_OMOD_PROF<i16, f16>;
202-
def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_True16<VOP_I16_F16> {
202+
def VOP_I16_F16_SPECIAL_OMOD_t16 : VOPProfile_Fake16<VOP_I16_F16> {
203203
let HasOMod = 1;
204204
}
205205

@@ -292,13 +292,13 @@ let FPDPRounding = 1, isReMaterializable = 0 in {
292292
let OtherPredicates = [NotHasTrue16BitInsts] in
293293
defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>;
294294
let OtherPredicates = [HasTrue16BitInsts] in
295-
defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_True16<VOP_F16_F32>, any_fpround>;
295+
defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_Fake16<VOP_F16_F32>, any_fpround>;
296296
} // End FPDPRounding = 1, isReMaterializable = 0
297297

298298
let OtherPredicates = [NotHasTrue16BitInsts] in
299299
defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>;
300300
let OtherPredicates = [HasTrue16BitInsts] in
301-
defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_True16<VOP_F32_F16>, any_fpextend>;
301+
defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_Fake16<VOP_F32_F16>, any_fpextend>;
302302

303303
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
304304
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;

0 commit comments

Comments
 (0)