Skip to content

Commit 267de85

Browse files
author
Rin Dobrescu
authored
[llvm-mca][AArch64] Add AArch64 version of clearsSuperRegisters. (#92548)
This patch overrides the clearsSuperRegisters method defined in MCInstrAnalysis to identify register writes that clear the upper portion of all super-registers on AArch64 architecture. On AArch64, a write to a general-purpose register of 32-bit data size is defined to use the lower 32-bits of the register and zero extend the upper 32-bits. Similarly, SIMD and FP instructions operating on scalar data only access the lower bits of the SIMD&FP register. The unused upper bits are cleared to zero on a write. This also applies to SIMD vector registers when the element size in bits multiplied by the number of lanes is lower than 128. The upper 64 bits of the vector register are cleared to zero on a write.
1 parent 7d9634e commit 267de85

File tree

3 files changed

+1652
-0
lines changed

3 files changed

+1652
-0
lines changed

llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,55 @@ class AArch64MCInstrAnalysis : public MCInstrAnalysis {
430430
return false;
431431
}
432432

433+
bool clearsSuperRegisters(const MCRegisterInfo &MRI, const MCInst &Inst,
434+
APInt &Mask) const override {
435+
const MCInstrDesc &Desc = Info->get(Inst.getOpcode());
436+
unsigned NumDefs = Desc.getNumDefs();
437+
unsigned NumImplicitDefs = Desc.implicit_defs().size();
438+
assert(Mask.getBitWidth() == NumDefs + NumImplicitDefs &&
439+
"Unexpected number of bits in the mask!");
440+
// 32-bit General Purpose Register class.
441+
const MCRegisterClass &GPR32RC = MRI.getRegClass(AArch64::GPR32RegClassID);
442+
// Floating Point Register classes.
443+
const MCRegisterClass &FPR8RC = MRI.getRegClass(AArch64::FPR8RegClassID);
444+
const MCRegisterClass &FPR16RC = MRI.getRegClass(AArch64::FPR16RegClassID);
445+
const MCRegisterClass &FPR32RC = MRI.getRegClass(AArch64::FPR32RegClassID);
446+
const MCRegisterClass &FPR64RC = MRI.getRegClass(AArch64::FPR64RegClassID);
447+
const MCRegisterClass &FPR128RC =
448+
MRI.getRegClass(AArch64::FPR128RegClassID);
449+
450+
auto ClearsSuperReg = [=](unsigned RegID) {
451+
// An update to the lower 32 bits of a 64 bit integer register is
452+
// architecturally defined to zero extend the upper 32 bits on a write.
453+
if (GPR32RC.contains(RegID))
454+
return true;
455+
// SIMD&FP instructions operating on scalar data only acccess the lower
456+
// bits of a register, the upper bits are zero extended on a write. For
457+
// SIMD vector registers smaller than 128-bits, the upper 64-bits of the
458+
// register are zero extended on a write.
459+
// When VL is higher than 128 bits, any write to a SIMD&FP register sets
460+
// bits higher than 128 to zero.
461+
return FPR8RC.contains(RegID) || FPR16RC.contains(RegID) ||
462+
FPR32RC.contains(RegID) || FPR64RC.contains(RegID) ||
463+
FPR128RC.contains(RegID);
464+
};
465+
466+
Mask.clearAllBits();
467+
for (unsigned I = 0, E = NumDefs; I < E; ++I) {
468+
const MCOperand &Op = Inst.getOperand(I);
469+
if (ClearsSuperReg(Op.getReg()))
470+
Mask.setBit(I);
471+
}
472+
473+
for (unsigned I = 0, E = NumImplicitDefs; I < E; ++I) {
474+
const MCPhysReg Reg = Desc.implicit_defs()[I];
475+
if (ClearsSuperReg(Reg))
476+
Mask.setBit(NumDefs + I);
477+
}
478+
479+
return Mask.getBoolValue();
480+
}
481+
433482
std::vector<std::pair<uint64_t, uint64_t>>
434483
findPltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
435484
const Triple &TargetTriple) const override {

0 commit comments

Comments
 (0)