Skip to content

[X86][CodeGen] Support long instruction fixup for APX NDD instructions #83578

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1315,6 +1315,33 @@ inline bool isKMasked(uint64_t TSFlags) {
inline bool isKMergeMasked(uint64_t TSFlags) {
return isKMasked(TSFlags) && (TSFlags & X86II::EVEX_Z) == 0;
}

/// \returns true if the intruction needs a SIB.
inline bool needSIB(unsigned BaseReg, unsigned IndexReg, bool In64BitMode) {
// The SIB byte must be used if there is an index register.
if (IndexReg)
return true;

// The SIB byte must be used if the base is ESP/RSP/R12/R20/R28, all of
// which encode to an R/M value of 4, which indicates that a SIB byte is
// present.
switch (BaseReg) {
default:
// If there is no base register and we're in 64-bit mode, we need a SIB
// byte to emit an addr that is just 'disp32' (the non-RIP relative form).
return In64BitMode && !BaseReg;
case X86::ESP:
case X86::RSP:
case X86::R12:
case X86::R12D:
case X86::R20:
case X86::R20D:
case X86::R28:
case X86::R28D:
return true;
}
}

} // namespace X86II
} // namespace llvm
#endif
13 changes: 2 additions & 11 deletions llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -753,17 +753,8 @@ void X86MCCodeEmitter::emitMemModRMByte(
bool AllowDisp8 = !UseDisp32;

// Determine whether a SIB byte is needed.
if ( // The SIB byte must be used if there is an index register or the
// encoding requires a SIB byte.
!ForceSIB && IndexReg.getReg() == 0 &&
// The SIB byte must be used if the base is ESP/RSP/R12/R20/R28, all of
// which encode to an R/M value of 4, which indicates that a SIB byte is
// present.
BaseRegNo != N86::ESP &&
// If there is no base register and we're in 64-bit mode, we need a SIB
// byte to emit an addr that is just 'disp32' (the non-RIP relative form).
(!STI.hasFeature(X86::Is64Bit) || BaseReg != 0)) {

if (!ForceSIB && !X86II::needSIB(BaseReg, IndexReg.getReg(),
STI.hasFeature(X86::Is64Bit))) {
if (BaseReg == 0) { // [disp32] in X86-32 mode
emitByte(modRMByte(0, RegOpcodeField, 5), CB);
emitImmediate(Disp, MI.getLoc(), 4, FK_Data_4, StartByte, CB, Fixups);
Expand Down
85 changes: 85 additions & 0 deletions llvm/lib/Target/X86/X86ExpandPseudo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,91 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
case X86::CALL64m_RVMARKER:
expandCALL_RVMARKER(MBB, MBBI);
return true;
case X86::ADD32mi_ND:
case X86::ADD64mi32_ND:
case X86::SUB32mi_ND:
case X86::SUB64mi32_ND:
case X86::AND32mi_ND:
case X86::AND64mi32_ND:
case X86::OR32mi_ND:
case X86::OR64mi32_ND:
case X86::XOR32mi_ND:
case X86::XOR64mi32_ND:
case X86::ADC32mi_ND:
case X86::ADC64mi32_ND:
case X86::SBB32mi_ND:
case X86::SBB64mi32_ND: {
// It's possible for an EVEX-encoded legacy instruction to reach the 15-byte
// instruction length limit: 4 bytes of EVEX prefix + 1 byte of opcode + 1
// byte of ModRM + 1 byte of SIB + 4 bytes of displacement + 4 bytes of
// immediate = 15 bytes in total, e.g.
//
// subq $184, %fs:257(%rbx, %rcx), %rax
//
// In such a case, no additional (ADSIZE or segment override) prefix can be
// used. To resolve the issue, we split the “long” instruction into 2
// instructions:
//
// movq %fs:257(%rbx, %rcx),%rax
// subq $184, %rax
//
// Therefore we consider the OPmi_ND to be a pseudo instruction to some
// extent.
const MachineOperand &ImmOp =
MI.getOperand(MI.getNumExplicitOperands() - 1);
// If the immediate is a expr, conservatively estimate 4 bytes.
if (ImmOp.isImm() && isInt<8>(ImmOp.getImm()))
return false;
int MemOpNo = X86::getFirstAddrOperandIdx(MI);
const MachineOperand &DispOp = MI.getOperand(MemOpNo + X86::AddrDisp);
Register Base = MI.getOperand(MemOpNo + X86::AddrBaseReg).getReg();
// If the displacement is a expr, conservatively estimate 4 bytes.
if (Base && DispOp.isImm() && isInt<8>(DispOp.getImm()))
return false;
// There can only be one of three: SIB, segment override register, ADSIZE
Register Index = MI.getOperand(MemOpNo + X86::AddrIndexReg).getReg();
unsigned Count = !!MI.getOperand(MemOpNo + X86::AddrSegmentReg).getReg();
if (X86II::needSIB(Base, Index, /*In64BitMode=*/true))
++Count;
if (X86MCRegisterClasses[X86::GR32RegClassID].contains(Base) ||
X86MCRegisterClasses[X86::GR32RegClassID].contains(Index))
++Count;
if (Count < 2)
return false;
unsigned Opc, LoadOpc;
switch (Opcode) {
#define MI_TO_RI(OP) \
case X86::OP##32mi_ND: \
Opc = X86::OP##32ri; \
LoadOpc = X86::MOV32rm; \
break; \
case X86::OP##64mi32_ND: \
Opc = X86::OP##64ri32; \
LoadOpc = X86::MOV64rm; \
break;

default:
llvm_unreachable("Unexpected Opcode");
MI_TO_RI(ADD);
MI_TO_RI(SUB);
MI_TO_RI(AND);
MI_TO_RI(OR);
MI_TO_RI(XOR);
MI_TO_RI(ADC);
MI_TO_RI(SBB);
#undef MI_TO_RI
}
// Insert OPri.
Register DestReg = MI.getOperand(0).getReg();
BuildMI(MBB, std::next(MBBI), DL, TII->get(Opc), DestReg)
.addReg(DestReg)
.add(ImmOp);
// Change OPmi_ND to MOVrm.
for (unsigned I = MI.getNumImplicitOperands() + 1; I != 0; --I)
MI.removeOperand(MI.getNumOperands() - 1);
MI.setDesc(TII->get(LoadOpc));
return true;
}
}
llvm_unreachable("Previous switch has a fallthrough?");
}
Expand Down
60 changes: 60 additions & 0 deletions llvm/test/CodeGen/X86/apx/long-instruction-fixup-x32.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 -mattr=+ndd -verify-machineinstrs | FileCheck %s


define i32 @add32mi_SIB_ADSIZE(ptr nocapture noundef readonly %a, i32 noundef %b) {
; CHECK-LABEL: add32mi_SIB_ADSIZE:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl 1164(%edi,%esi,4), %eax
; CHECK-NEXT: addl $4660, %eax # imm = 0x1234
; CHECK-NEXT: retq
entry:
%add.ptr = getelementptr inbounds i32, ptr %a, i32 %b
%add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i32 1164
%0 = load i32, ptr %add.ptr1
%add = add nsw i32 %0, 4660
ret i32 %add
}

declare ptr @llvm.thread.pointer()

define i32 @add32mi_FS_ADSIZE(i32 %i) {
; CHECK-LABEL: add32mi_FS_ADSIZE:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl %fs:0, %eax
; CHECK-NEXT: addl $4660, (%eax,%edi,4), %eax # imm = 0x1234
; CHECK-NEXT: retq
entry:
%0 = tail call ptr @llvm.thread.pointer()
%arrayidx = getelementptr inbounds i32, ptr %0, i32 %i
%1 = load i32, ptr %arrayidx
%add = add nsw i32 %1, 4660
ret i32 %add
}

define i32 @add32mi_FS_SIB(i32 %i) {
; CHECK-LABEL: add32mi_FS_SIB:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl %fs:0, %eax
; CHECK-NEXT: addl $4660, %eax # imm = 0x1234
; CHECK-NEXT: retq
entry:
%0 = tail call ptr @llvm.thread.pointer()
%arrayidx = getelementptr inbounds i32, ptr %0, i32 0
%1 = load i32, ptr %arrayidx
%add = add nsw i32 %1, 4660
ret i32 %add
}

define i32 @add32mi_GS_ADSIZE(ptr addrspace(256) %a) {
; CHECK-LABEL: add32mi_GS_ADSIZE:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl %gs:4936(%edi), %eax
; CHECK-NEXT: addl $123456, %eax # imm = 0x1E240
; CHECK-NEXT: retq
entry:
%arrayidx = getelementptr inbounds i32, ptr addrspace(256) %a, i32 1234
%t = load i32, ptr addrspace(256) %arrayidx
%add = add i32 %t, 123456
ret i32 %add
}
Loading