Skip to content

Commit 37293e6

Browse files
authored
[X86][CodeGen] Support long instruction fixup for APX NDD instructions (#83578)
RFC: https://discourse.llvm.org/t/rfc-support-long-instruction-fixup-for-x86/76539
1 parent c462160 commit 37293e6

File tree

5 files changed

+428
-11
lines changed

5 files changed

+428
-11
lines changed

llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1315,6 +1315,33 @@ inline bool isKMasked(uint64_t TSFlags) {
13151315
inline bool isKMergeMasked(uint64_t TSFlags) {
13161316
return isKMasked(TSFlags) && (TSFlags & X86II::EVEX_Z) == 0;
13171317
}
1318+
1319+
/// \returns true if the intruction needs a SIB.
1320+
inline bool needSIB(unsigned BaseReg, unsigned IndexReg, bool In64BitMode) {
1321+
// The SIB byte must be used if there is an index register.
1322+
if (IndexReg)
1323+
return true;
1324+
1325+
// The SIB byte must be used if the base is ESP/RSP/R12/R20/R28, all of
1326+
// which encode to an R/M value of 4, which indicates that a SIB byte is
1327+
// present.
1328+
switch (BaseReg) {
1329+
default:
1330+
// If there is no base register and we're in 64-bit mode, we need a SIB
1331+
// byte to emit an addr that is just 'disp32' (the non-RIP relative form).
1332+
return In64BitMode && !BaseReg;
1333+
case X86::ESP:
1334+
case X86::RSP:
1335+
case X86::R12:
1336+
case X86::R12D:
1337+
case X86::R20:
1338+
case X86::R20D:
1339+
case X86::R28:
1340+
case X86::R28D:
1341+
return true;
1342+
}
1343+
}
1344+
13181345
} // namespace X86II
13191346
} // namespace llvm
13201347
#endif

llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -753,17 +753,8 @@ void X86MCCodeEmitter::emitMemModRMByte(
753753
bool AllowDisp8 = !UseDisp32;
754754

755755
// Determine whether a SIB byte is needed.
756-
if ( // The SIB byte must be used if there is an index register or the
757-
// encoding requires a SIB byte.
758-
!ForceSIB && IndexReg.getReg() == 0 &&
759-
// The SIB byte must be used if the base is ESP/RSP/R12/R20/R28, all of
760-
// which encode to an R/M value of 4, which indicates that a SIB byte is
761-
// present.
762-
BaseRegNo != N86::ESP &&
763-
// If there is no base register and we're in 64-bit mode, we need a SIB
764-
// byte to emit an addr that is just 'disp32' (the non-RIP relative form).
765-
(!STI.hasFeature(X86::Is64Bit) || BaseReg != 0)) {
766-
756+
if (!ForceSIB && !X86II::needSIB(BaseReg, IndexReg.getReg(),
757+
STI.hasFeature(X86::Is64Bit))) {
767758
if (BaseReg == 0) { // [disp32] in X86-32 mode
768759
emitByte(modRMByte(0, RegOpcodeField, 5), CB);
769760
emitImmediate(Disp, MI.getLoc(), 4, FK_Data_4, StartByte, CB, Fixups);

llvm/lib/Target/X86/X86ExpandPseudo.cpp

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,91 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
613613
case X86::CALL64m_RVMARKER:
614614
expandCALL_RVMARKER(MBB, MBBI);
615615
return true;
616+
case X86::ADD32mi_ND:
617+
case X86::ADD64mi32_ND:
618+
case X86::SUB32mi_ND:
619+
case X86::SUB64mi32_ND:
620+
case X86::AND32mi_ND:
621+
case X86::AND64mi32_ND:
622+
case X86::OR32mi_ND:
623+
case X86::OR64mi32_ND:
624+
case X86::XOR32mi_ND:
625+
case X86::XOR64mi32_ND:
626+
case X86::ADC32mi_ND:
627+
case X86::ADC64mi32_ND:
628+
case X86::SBB32mi_ND:
629+
case X86::SBB64mi32_ND: {
630+
// It's possible for an EVEX-encoded legacy instruction to reach the 15-byte
631+
// instruction length limit: 4 bytes of EVEX prefix + 1 byte of opcode + 1
632+
// byte of ModRM + 1 byte of SIB + 4 bytes of displacement + 4 bytes of
633+
// immediate = 15 bytes in total, e.g.
634+
//
635+
// subq $184, %fs:257(%rbx, %rcx), %rax
636+
//
637+
// In such a case, no additional (ADSIZE or segment override) prefix can be
638+
// used. To resolve the issue, we split the “long” instruction into 2
639+
// instructions:
640+
//
641+
// movq %fs:257(%rbx, %rcx),%rax
642+
// subq $184, %rax
643+
//
644+
// Therefore we consider the OPmi_ND to be a pseudo instruction to some
645+
// extent.
646+
const MachineOperand &ImmOp =
647+
MI.getOperand(MI.getNumExplicitOperands() - 1);
648+
// If the immediate is a expr, conservatively estimate 4 bytes.
649+
if (ImmOp.isImm() && isInt<8>(ImmOp.getImm()))
650+
return false;
651+
int MemOpNo = X86::getFirstAddrOperandIdx(MI);
652+
const MachineOperand &DispOp = MI.getOperand(MemOpNo + X86::AddrDisp);
653+
Register Base = MI.getOperand(MemOpNo + X86::AddrBaseReg).getReg();
654+
// If the displacement is a expr, conservatively estimate 4 bytes.
655+
if (Base && DispOp.isImm() && isInt<8>(DispOp.getImm()))
656+
return false;
657+
// There can only be one of three: SIB, segment override register, ADSIZE
658+
Register Index = MI.getOperand(MemOpNo + X86::AddrIndexReg).getReg();
659+
unsigned Count = !!MI.getOperand(MemOpNo + X86::AddrSegmentReg).getReg();
660+
if (X86II::needSIB(Base, Index, /*In64BitMode=*/true))
661+
++Count;
662+
if (X86MCRegisterClasses[X86::GR32RegClassID].contains(Base) ||
663+
X86MCRegisterClasses[X86::GR32RegClassID].contains(Index))
664+
++Count;
665+
if (Count < 2)
666+
return false;
667+
unsigned Opc, LoadOpc;
668+
switch (Opcode) {
669+
#define MI_TO_RI(OP) \
670+
case X86::OP##32mi_ND: \
671+
Opc = X86::OP##32ri; \
672+
LoadOpc = X86::MOV32rm; \
673+
break; \
674+
case X86::OP##64mi32_ND: \
675+
Opc = X86::OP##64ri32; \
676+
LoadOpc = X86::MOV64rm; \
677+
break;
678+
679+
default:
680+
llvm_unreachable("Unexpected Opcode");
681+
MI_TO_RI(ADD);
682+
MI_TO_RI(SUB);
683+
MI_TO_RI(AND);
684+
MI_TO_RI(OR);
685+
MI_TO_RI(XOR);
686+
MI_TO_RI(ADC);
687+
MI_TO_RI(SBB);
688+
#undef MI_TO_RI
689+
}
690+
// Insert OPri.
691+
Register DestReg = MI.getOperand(0).getReg();
692+
BuildMI(MBB, std::next(MBBI), DL, TII->get(Opc), DestReg)
693+
.addReg(DestReg)
694+
.add(ImmOp);
695+
// Change OPmi_ND to MOVrm.
696+
for (unsigned I = MI.getNumImplicitOperands() + 1; I != 0; --I)
697+
MI.removeOperand(MI.getNumOperands() - 1);
698+
MI.setDesc(TII->get(LoadOpc));
699+
return true;
700+
}
616701
}
617702
llvm_unreachable("Previous switch has a fallthrough?");
618703
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-pc-linux-gnux32 -mattr=+ndd -verify-machineinstrs | FileCheck %s
3+
4+
5+
define i32 @add32mi_SIB_ADSIZE(ptr nocapture noundef readonly %a, i32 noundef %b) {
6+
; CHECK-LABEL: add32mi_SIB_ADSIZE:
7+
; CHECK: # %bb.0: # %entry
8+
; CHECK-NEXT: movl 1164(%edi,%esi,4), %eax
9+
; CHECK-NEXT: addl $4660, %eax # imm = 0x1234
10+
; CHECK-NEXT: retq
11+
entry:
12+
%add.ptr = getelementptr inbounds i32, ptr %a, i32 %b
13+
%add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i32 1164
14+
%0 = load i32, ptr %add.ptr1
15+
%add = add nsw i32 %0, 4660
16+
ret i32 %add
17+
}
18+
19+
declare ptr @llvm.thread.pointer()
20+
21+
define i32 @add32mi_FS_ADSIZE(i32 %i) {
22+
; CHECK-LABEL: add32mi_FS_ADSIZE:
23+
; CHECK: # %bb.0: # %entry
24+
; CHECK-NEXT: movl %fs:0, %eax
25+
; CHECK-NEXT: addl $4660, (%eax,%edi,4), %eax # imm = 0x1234
26+
; CHECK-NEXT: retq
27+
entry:
28+
%0 = tail call ptr @llvm.thread.pointer()
29+
%arrayidx = getelementptr inbounds i32, ptr %0, i32 %i
30+
%1 = load i32, ptr %arrayidx
31+
%add = add nsw i32 %1, 4660
32+
ret i32 %add
33+
}
34+
35+
define i32 @add32mi_FS_SIB(i32 %i) {
36+
; CHECK-LABEL: add32mi_FS_SIB:
37+
; CHECK: # %bb.0: # %entry
38+
; CHECK-NEXT: movl %fs:0, %eax
39+
; CHECK-NEXT: addl $4660, %eax # imm = 0x1234
40+
; CHECK-NEXT: retq
41+
entry:
42+
%0 = tail call ptr @llvm.thread.pointer()
43+
%arrayidx = getelementptr inbounds i32, ptr %0, i32 0
44+
%1 = load i32, ptr %arrayidx
45+
%add = add nsw i32 %1, 4660
46+
ret i32 %add
47+
}
48+
49+
define i32 @add32mi_GS_ADSIZE(ptr addrspace(256) %a) {
50+
; CHECK-LABEL: add32mi_GS_ADSIZE:
51+
; CHECK: # %bb.0: # %entry
52+
; CHECK-NEXT: movl %gs:4936(%edi), %eax
53+
; CHECK-NEXT: addl $123456, %eax # imm = 0x1E240
54+
; CHECK-NEXT: retq
55+
entry:
56+
%arrayidx = getelementptr inbounds i32, ptr addrspace(256) %a, i32 1234
57+
%t = load i32, ptr addrspace(256) %arrayidx
58+
%add = add i32 %t, 123456
59+
ret i32 %add
60+
}

0 commit comments

Comments
 (0)