Skip to content

Commit d6acac3

Browse files
committed
[X86][CodeGen] Support long instruction fixup for APX NDD instructions
RFC: https://discourse.llvm.org/t/rfc-support-long-instruction-fixup-for-x86/76539
1 parent 924ad19 commit d6acac3

File tree

4 files changed

+324
-11
lines changed

4 files changed

+324
-11
lines changed

llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1315,6 +1315,35 @@ inline bool isKMasked(uint64_t TSFlags) {
13151315
inline bool isKMergeMasked(uint64_t TSFlags) {
13161316
return isKMasked(TSFlags) && (TSFlags & X86II::EVEX_Z) == 0;
13171317
}
1318+
1319+
/// \returns true if the intruction needs a SIB.
1320+
inline bool needSIB(unsigned BaseReg, unsigned IndexReg, bool In64BitMode) {
1321+
// The SIB byte must be used if there is an index register.
1322+
if (IndexReg)
1323+
return true;
1324+
1325+
// The SIB byte must be used if the base is ESP/RSP/R12/R20/R28, all of
1326+
// which encode to an R/M value of 4, which indicates that a SIB byte is
1327+
// present.
1328+
switch (BaseReg) {
1329+
default:
1330+
break;
1331+
case X86::ESP:
1332+
case X86::RSP:
1333+
case X86::R12:
1334+
case X86::R12D:
1335+
case X86::R20:
1336+
case X86::R20D:
1337+
case X86::R28:
1338+
case X86::R28D:
1339+
return true;
1340+
}
1341+
1342+
// If there is no base register and we're in 64-bit mode, we need a SIB
1343+
// byte to emit an addr that is just 'disp32' (the non-RIP relative form).
1344+
return In64BitMode && !BaseReg;
1345+
}
1346+
13181347
} // namespace X86II
13191348
} // namespace llvm
13201349
#endif

llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -753,17 +753,8 @@ void X86MCCodeEmitter::emitMemModRMByte(
753753
bool AllowDisp8 = !UseDisp32;
754754

755755
// Determine whether a SIB byte is needed.
756-
if ( // The SIB byte must be used if there is an index register or the
757-
// encoding requires a SIB byte.
758-
!ForceSIB && IndexReg.getReg() == 0 &&
759-
// The SIB byte must be used if the base is ESP/RSP/R12/R20/R28, all of
760-
// which encode to an R/M value of 4, which indicates that a SIB byte is
761-
// present.
762-
BaseRegNo != N86::ESP &&
763-
// If there is no base register and we're in 64-bit mode, we need a SIB
764-
// byte to emit an addr that is just 'disp32' (the non-RIP relative form).
765-
(!STI.hasFeature(X86::Is64Bit) || BaseReg != 0)) {
766-
756+
if (!ForceSIB && !X86II::needSIB(BaseReg, IndexReg.getReg(),
757+
STI.hasFeature(X86::Is64Bit))) {
767758
if (BaseReg == 0) { // [disp32] in X86-32 mode
768759
emitByte(modRMByte(0, RegOpcodeField, 5), CB);
769760
emitImmediate(Disp, MI.getLoc(), 4, FK_Data_4, StartByte, CB, Fixups);

llvm/lib/Target/X86/X86ExpandPseudo.cpp

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -613,6 +613,87 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
613613
case X86::CALL64m_RVMARKER:
614614
expandCALL_RVMARKER(MBB, MBBI);
615615
return true;
616+
case X86::ADD32mi_ND:
617+
case X86::ADD64mi32_ND:
618+
case X86::SUB32mi_ND:
619+
case X86::SUB64mi32_ND:
620+
case X86::AND32mi_ND:
621+
case X86::AND64mi32_ND:
622+
case X86::OR32mi_ND:
623+
case X86::OR64mi32_ND:
624+
case X86::XOR32mi_ND:
625+
case X86::XOR64mi32_ND:
626+
case X86::ADC32mi_ND:
627+
case X86::ADC64mi32_ND:
628+
case X86::SBB32mi_ND:
629+
case X86::SBB64mi32_ND: {
630+
// It's possible for an EVEX-encoded legacy instruction to reach the 15-byte
631+
// instruction length limit: 4 bytes of EVEX prefix + 1 byte of opcode + 1
632+
// byte of ModRM + 1 byte of SIB + 4 bytes of displacement + 4 bytes of
633+
// immediate = 15 bytes in total, e.g.
634+
//
635+
// addq $184, -96, %rax
636+
//
637+
// In such a case, no additional segment override prefix can be used. To
638+
// resolve the issue, we split the “long” instruction into 2 instructions:
639+
//
640+
// subq $184, %fs:257(%rbx, %rcx), %rax
641+
//
642+
// ->
643+
//
644+
// movq %fs:257(%rbx, %rcx),%rax
645+
// subq $184, %rax
646+
int MemOpNo = X86::getFirstAddrOperandIdx(MI);
647+
Register Segment = MI.getOperand(MemOpNo + X86::AddrSegmentReg).getReg();
648+
if (Segment == X86::NoRegister)
649+
return false;
650+
const MachineOperand &ImmOp =
651+
MI.getOperand(MI.getNumExplicitOperands() - 1);
652+
// If the immediate is a expr, conservatively estimate 4 bytes.
653+
if (ImmOp.isImm() && isInt<8>(ImmOp.getImm()))
654+
return false;
655+
Register Base = MI.getOperand(MemOpNo + X86::AddrBaseReg).getReg();
656+
Register Index = MI.getOperand(MemOpNo + X86::AddrIndexReg).getReg();
657+
if (!X86II::needSIB(Base, Index, /*In64BitMode=*/true))
658+
return false;
659+
const MachineOperand &DispOp = MI.getOperand(MemOpNo + X86::AddrDisp);
660+
// If the displacement is a expr, conservatively estimate 4 bytes.
661+
if (DispOp.isImm() && isInt<8>(DispOp.getImm()))
662+
return false;
663+
unsigned Opc, LoadOpc;
664+
switch (Opcode) {
665+
#define MI_TO_RI(OP) \
666+
case X86::OP##32mi_ND: \
667+
Opc = X86::OP##32ri; \
668+
LoadOpc = X86::MOV32rm; \
669+
break; \
670+
case X86::OP##64mi32_ND: \
671+
Opc = X86::OP##64ri32; \
672+
LoadOpc = X86::MOV64rm; \
673+
break;
674+
675+
default:
676+
llvm_unreachable("Unexpected Opcode");
677+
MI_TO_RI(ADD);
678+
MI_TO_RI(SUB);
679+
MI_TO_RI(AND);
680+
MI_TO_RI(OR);
681+
MI_TO_RI(XOR);
682+
MI_TO_RI(ADC);
683+
MI_TO_RI(SBB);
684+
#undef MI_TO_RI
685+
}
686+
// Insert OPri.
687+
Register DestReg = MI.getOperand(0).getReg();
688+
BuildMI(MBB, std::next(MBBI), DL, TII->get(Opc), DestReg)
689+
.addReg(DestReg)
690+
.add(ImmOp);
691+
// Change OPmi_ND to MOVrm.
692+
for (unsigned I = MI.getNumImplicitOperands() + 1; I != 0; --I)
693+
MI.removeOperand(MI.getNumOperands() - 1);
694+
MI.setDesc(TII->get(LoadOpc));
695+
return true;
696+
}
616697
}
617698
llvm_unreachable("Previous switch has a fallthrough?");
618699
}
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd -verify-machineinstrs | FileCheck %s
3+
4+
define i32 @add32mi_GS() {
5+
; CHECK-LABEL: add32mi_GS:
6+
; CHECK: # %bb.0: # %entry
7+
; CHECK-NEXT: movl %gs:255, %eax
8+
; CHECK-NEXT: addl $123456, %eax # imm = 0x1E240
9+
; CHECK-NEXT: retq
10+
entry:
11+
%a= inttoptr i32 255 to ptr addrspace(256)
12+
%t = load i32, ptr addrspace(256) %a
13+
%add = add i32 %t, 123456
14+
ret i32 %add
15+
}
16+
17+
define i64 @add64mi_FS() {
18+
; CHECK-LABEL: add64mi_FS:
19+
; CHECK: # %bb.0: # %entry
20+
; CHECK-NEXT: movq %fs:255, %rax
21+
; CHECK-NEXT: addq $123456, %rax # imm = 0x1E240
22+
; CHECK-NEXT: retq
23+
entry:
24+
%a= inttoptr i64 255 to ptr addrspace(257)
25+
%t = load i64, ptr addrspace(257) %a
26+
%add = add i64 %t, 123456
27+
ret i64 %add
28+
}
29+
30+
define i32 @sub32mi_GS() {
31+
; CHECK-LABEL: sub32mi_GS:
32+
; CHECK: # %bb.0: # %entry
33+
; CHECK-NEXT: movl %gs:255, %eax
34+
; CHECK-NEXT: addl $129, %eax
35+
; CHECK-NEXT: retq
36+
entry:
37+
%a= inttoptr i32 255 to ptr addrspace(256)
38+
%t = load i32, ptr addrspace(256) %a
39+
%sub = sub i32 %t, -129
40+
ret i32 %sub
41+
}
42+
43+
define i64 @sub64mi_FS() {
44+
; CHECK-LABEL: sub64mi_FS:
45+
; CHECK: # %bb.0: # %entry
46+
; CHECK-NEXT: movq %fs:255, %rax
47+
; CHECK-NEXT: subq $-2147483648, %rax # imm = 0x80000000
48+
; CHECK-NEXT: retq
49+
entry:
50+
%a= inttoptr i64 255 to ptr addrspace(257)
51+
%t = load i64, ptr addrspace(257) %a
52+
%sub = sub i64 %t, -2147483648
53+
ret i64 %sub
54+
}
55+
56+
define i32 @and32mi_GS() {
57+
; CHECK-LABEL: and32mi_GS:
58+
; CHECK: # %bb.0: # %entry
59+
; CHECK-NEXT: movl %gs:255, %eax
60+
; CHECK-NEXT: andl $-129, %eax
61+
; CHECK-NEXT: retq
62+
entry:
63+
%a= inttoptr i32 255 to ptr addrspace(256)
64+
%t = load i32, ptr addrspace(256) %a
65+
%and = and i32 %t, -129
66+
ret i32 %and
67+
}
68+
69+
define i64 @and64mi_FS() {
70+
; CHECK-LABEL: and64mi_FS:
71+
; CHECK: # %bb.0: # %entry
72+
; CHECK-NEXT: movq %fs:255, %rax
73+
; CHECK-NEXT: andq $-2147483648, %rax # imm = 0x80000000
74+
; CHECK-NEXT: retq
75+
entry:
76+
%a= inttoptr i64 255 to ptr addrspace(257)
77+
%t = load i64, ptr addrspace(257) %a
78+
%and = and i64 %t, -2147483648
79+
ret i64 %and
80+
}
81+
82+
define i32 @or32mi_GS() {
83+
; CHECK-LABEL: or32mi_GS:
84+
; CHECK: # %bb.0: # %entry
85+
; CHECK-NEXT: movl %gs:255, %eax
86+
; CHECK-NEXT: orl $-129, %eax
87+
; CHECK-NEXT: retq
88+
entry:
89+
%a= inttoptr i32 255 to ptr addrspace(256)
90+
%t = load i32, ptr addrspace(256) %a
91+
%or = or i32 %t, -129
92+
ret i32 %or
93+
}
94+
95+
define i64 @or64mi_FS() {
96+
; CHECK-LABEL: or64mi_FS:
97+
; CHECK: # %bb.0: # %entry
98+
; CHECK-NEXT: movq %fs:255, %rax
99+
; CHECK-NEXT: orq $-2147483648, %rax # imm = 0x80000000
100+
; CHECK-NEXT: retq
101+
entry:
102+
%a= inttoptr i64 255 to ptr addrspace(257)
103+
%t = load i64, ptr addrspace(257) %a
104+
%or = or i64 %t, -2147483648
105+
ret i64 %or
106+
}
107+
108+
define i32 @xor32mi_GS() {
109+
; CHECK-LABEL: xor32mi_GS:
110+
; CHECK: # %bb.0: # %entry
111+
; CHECK-NEXT: movl %gs:255, %eax
112+
; CHECK-NEXT: xorl $-129, %eax
113+
; CHECK-NEXT: retq
114+
entry:
115+
%a= inttoptr i32 255 to ptr addrspace(256)
116+
%t = load i32, ptr addrspace(256) %a
117+
%xor = xor i32 %t, -129
118+
ret i32 %xor
119+
}
120+
121+
define i64 @xor64mi_FS() {
122+
; CHECK-LABEL: xor64mi_FS:
123+
; CHECK: # %bb.0: # %entry
124+
; CHECK-NEXT: movq %fs:255, %rax
125+
; CHECK-NEXT: xorq $-2147483648, %rax # imm = 0x80000000
126+
; CHECK-NEXT: retq
127+
entry:
128+
%a= inttoptr i64 255 to ptr addrspace(257)
129+
%t = load i64, ptr addrspace(257) %a
130+
%xor = xor i64 %t, -2147483648
131+
ret i64 %xor
132+
}
133+
134+
define i32 @adc32mi_GS(i32 %x, i32 %y) {
135+
; CHECK-LABEL: adc32mi_GS:
136+
; CHECK: # %bb.0: # %entry
137+
; CHECK-NEXT: cmpl %edi, %esi
138+
; CHECK-NEXT: movl %gs:255, %eax
139+
; CHECK-NEXT: adcl $123456, %eax # imm = 0x1E240
140+
; CHECK-NEXT: retq
141+
entry:
142+
%a = inttoptr i32 255 to ptr addrspace(256)
143+
%t = load i32, ptr addrspace(256) %a
144+
%s = add i32 %t, 123456
145+
%k = icmp ugt i32 %x, %y
146+
%z = zext i1 %k to i32
147+
%r = add i32 %s, %z
148+
ret i32 %r
149+
}
150+
151+
define i64 @adc64mi_FS(i64 %x, i64 %y) {
152+
; CHECK-LABEL: adc64mi_FS:
153+
; CHECK: # %bb.0: # %entry
154+
; CHECK-NEXT: cmpq %rdi, %rsi
155+
; CHECK-NEXT: movq %fs:255, %rax
156+
; CHECK-NEXT: adcq $123456, %rax # imm = 0x1E240
157+
; CHECK-NEXT: retq
158+
entry:
159+
%a = inttoptr i64 255 to ptr addrspace(257)
160+
%t = load i64, ptr addrspace(257) %a
161+
%s = add i64 %t, 123456
162+
%k = icmp ugt i64 %x, %y
163+
%z = zext i1 %k to i64
164+
%r = add i64 %s, %z
165+
ret i64 %r
166+
}
167+
168+
define i32 @sbb32mi_GS(i32 %x, i32 %y) {
169+
; CHECK-LABEL: sbb32mi_GS:
170+
; CHECK: # %bb.0: # %entry
171+
; CHECK-NEXT: cmpl %edi, %esi
172+
; CHECK-NEXT: sbbl $0, %gs:255, %eax
173+
; CHECK-NEXT: addl $-123456, %eax # imm = 0xFFFE1DC0
174+
; CHECK-NEXT: retq
175+
entry:
176+
%a= inttoptr i32 255 to ptr addrspace(256)
177+
%t = load i32, ptr addrspace(256) %a
178+
%s = sub i32 %t, 123456
179+
%k = icmp ugt i32 %x, %y
180+
%z = zext i1 %k to i32
181+
%r = sub i32 %s, %z
182+
ret i32 %r
183+
}
184+
185+
define i64 @sbb64mi_FS(i64 %x, i64 %y) {
186+
; CHECK-LABEL: sbb64mi_FS:
187+
; CHECK: # %bb.0: # %entry
188+
; CHECK-NEXT: cmpq %rdi, %rsi
189+
; CHECK-NEXT: sbbq $0, %fs:255, %rax
190+
; CHECK-NEXT: addq $-123456, %rax # imm = 0xFFFE1DC0
191+
; CHECK-NEXT: retq
192+
entry:
193+
%a= inttoptr i64 255 to ptr addrspace(257)
194+
%t = load i64, ptr addrspace(257) %a
195+
%s = sub i64 %t, 123456
196+
%k = icmp ugt i64 %x, %y
197+
%z = zext i1 %k to i64
198+
%r = sub i64 %s, %z
199+
ret i64 %r
200+
}
201+
202+
define i32 @add32mi8_GS() {
203+
; CHECK-LABEL: add32mi8_GS:
204+
; CHECK: # %bb.0: # %entry
205+
; CHECK-NEXT: addl $127, %gs:255, %eax
206+
; CHECK-NEXT: retq
207+
entry:
208+
%a= inttoptr i32 255 to ptr addrspace(256)
209+
%t = load i32, ptr addrspace(256) %a
210+
%add = add i32 %t, 127
211+
ret i32 %add
212+
}

0 commit comments

Comments
 (0)