-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86][RA] Add two address hints for compressible NDD instructions. #98603
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Here's the patch influence on cpu2017 benchmarks, it can reduce the text section of the binaries by 0.44% on average:
|
@llvm/pr-subscribers-backend-x86 Author: Freddy Ye (FreddyLeaf) ChangesPatch is 130.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/98603.diff 11 Files Affected:
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 555ede9e95403..5777bd0496881 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -45,6 +45,12 @@ static cl::opt<bool>
EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
cl::desc("Enable use of a base pointer for complex stack frames"));
+static cl::opt<bool>
+ DisableRegAllocHints("x86-disable-regalloc-hints", cl::Hidden,
+ cl::init(false),
+ cl::desc("Disable two address hints for register "
+ "allocation"));
+
X86RegisterInfo::X86RegisterInfo(const Triple &TT)
: X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP),
X86_MC::getDwarfRegFlavour(TT, false),
@@ -1082,6 +1088,42 @@ bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
VirtReg, Order, Hints, MF, VRM, Matrix);
unsigned ID = RC.getID();
+
+ if (!VRM || DisableRegAllocHints)
+ return BaseImplRetVal;
+
+ // Add any two address hints after any copy hints.
+ SmallSet<unsigned, 4> TwoAddrHints;
+
+ auto tryAddHint = [&](const MachineOperand &VRRegMO,
+ const MachineOperand &MO) -> void {
+ Register Reg = MO.getReg();
+ Register PhysReg =
+ Register::isPhysicalRegister(Reg) ? Reg : Register(VRM->getPhys(Reg));
+ if (PhysReg && !MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg))
+ TwoAddrHints.insert(PhysReg);
+ };
+
+ for (auto &MO : MRI->reg_nodbg_operands(VirtReg)) {
+ const MachineInstr &MI = *MO.getParent();
+ if (X86::getNonNDVariant(MI.getOpcode())) {
+ unsigned OpIdx = MI.getOperandNo(&MO);
+ if (OpIdx == 0 && MI.getOperand(1).isReg()) {
+ tryAddHint(MO, MI.getOperand(1));
+ if (MI.isCommutable() && MI.getOperand(2).isReg())
+ tryAddHint(MO, MI.getOperand(2));
+ } else if (OpIdx == 1) {
+ tryAddHint(MO, MI.getOperand(0));
+ } else if (MI.isCommutable() && OpIdx == 2) {
+ tryAddHint(MO, MI.getOperand(0));
+ }
+ }
+ }
+
+ for (MCPhysReg OrderReg : Order)
+ if (TwoAddrHints.count(OrderReg))
+ Hints.push_back(OrderReg);
+
if (ID != X86::TILERegClassID)
return BaseImplRetVal;
diff --git a/llvm/test/CodeGen/X86/apx/and.ll b/llvm/test/CodeGen/X86/apx/and.ll
index 51858ad591605..23aed77b948b9 100644
--- a/llvm/test/CodeGen/X86/apx/and.ll
+++ b/llvm/test/CodeGen/X86/apx/and.ll
@@ -482,17 +482,17 @@ define i1 @andflag16rr(i16 %a, i16 %b) {
define i1 @andflag32rr(i32 %a, i32 %b) {
; CHECK-LABEL: andflag32rr:
; CHECK: # %bb.0:
-; CHECK-NEXT: andl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x21,0xf7]
+; CHECK-NEXT: andl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x21,0xfe]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag32rr:
; NF: # %bb.0:
-; NF-NEXT: andl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x21,0xf7]
+; NF-NEXT: andl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x21,0xfe]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = and i32 %a, %b ; 0xff << 50
@@ -504,17 +504,17 @@ define i1 @andflag32rr(i32 %a, i32 %b) {
define i1 @andflag64rr(i64 %a, i64 %b) {
; CHECK-LABEL: andflag64rr:
; CHECK: # %bb.0:
-; CHECK-NEXT: andq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x21,0xf7]
+; CHECK-NEXT: andq %rdi, %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x21,0xfe]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag64rr:
; NF: # %bb.0:
-; NF-NEXT: andq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x21,0xf7]
+; NF-NEXT: andq %rdi, %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x21,0xfe]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, %b ; 0xff << 50
@@ -578,17 +578,17 @@ define i1 @andflag16rm(ptr %ptr, i16 %b) {
define i1 @andflag32rm(ptr %ptr, i32 %b) {
; CHECK-LABEL: andflag32rm:
; CHECK: # %bb.0:
-; CHECK-NEXT: andl (%rdi), %esi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x23,0x37]
+; CHECK-NEXT: andl (%rdi), %esi # EVEX TO LEGACY Compression encoding: [0x23,0x37]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag32rm:
; NF: # %bb.0:
-; NF-NEXT: andl (%rdi), %esi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x23,0x37]
+; NF-NEXT: andl (%rdi), %esi # EVEX TO LEGACY Compression encoding: [0x23,0x37]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%a = load i32, ptr %ptr
@@ -601,17 +601,17 @@ define i1 @andflag32rm(ptr %ptr, i32 %b) {
define i1 @andflag64rm(ptr %ptr, i64 %b) {
; CHECK-LABEL: andflag64rm:
; CHECK: # %bb.0:
-; CHECK-NEXT: andq (%rdi), %rsi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x23,0x37]
+; CHECK-NEXT: andq (%rdi), %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x23,0x37]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag64rm:
; NF: # %bb.0:
-; NF-NEXT: andq (%rdi), %rsi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x23,0x37]
+; NF-NEXT: andq (%rdi), %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x23,0x37]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%a = load i64, ptr %ptr
@@ -672,19 +672,19 @@ define i1 @andflag16ri(i16 %a) {
define i1 @andflag32ri(i32 %a) {
; CHECK-LABEL: andflag32ri:
; CHECK: # %bb.0:
-; CHECK-NEXT: andl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xe7,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: andl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xe7,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag32ri:
; NF: # %bb.0:
-; NF-NEXT: andl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xe7,0x40,0xe2,0x01,0x00]
+; NF-NEXT: andl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xe7,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = and i32 %a, 123456 ; 0xff << 50
@@ -696,19 +696,19 @@ define i1 @andflag32ri(i32 %a) {
define i1 @andflag64ri(i64 %a) {
; CHECK-LABEL: andflag64ri:
; CHECK: # %bb.0:
-; CHECK-NEXT: andq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xe7,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: andq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xe7,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag64ri:
; NF: # %bb.0:
-; NF-NEXT: andq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xe7,0x40,0xe2,0x01,0x00]
+; NF-NEXT: andq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xe7,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 123456 ; 0xff << 50
@@ -743,17 +743,17 @@ define i1 @andflag16ri8(i16 %a) {
define i1 @andflag32ri8(i32 %a) {
; CHECK-LABEL: andflag32ri8:
; CHECK: # %bb.0:
-; CHECK-NEXT: andl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xe7,0x7b]
+; CHECK-NEXT: andl $123, %edi # EVEX TO LEGACY Compression encoding: [0x83,0xe7,0x7b]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag32ri8:
; NF: # %bb.0:
-; NF-NEXT: andl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xe7,0x7b]
+; NF-NEXT: andl $123, %edi # EVEX TO LEGACY Compression encoding: [0x83,0xe7,0x7b]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = and i32 %a, 123 ; 0xff << 50
@@ -765,17 +765,17 @@ define i1 @andflag32ri8(i32 %a) {
define i1 @andflag64ri8(i64 %a) {
; CHECK-LABEL: andflag64ri8:
; CHECK: # %bb.0:
-; CHECK-NEXT: andq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xe7,0x7b]
+; CHECK-NEXT: andq $123, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xe7,0x7b]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag64ri8:
; NF: # %bb.0:
-; NF-NEXT: andq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xe7,0x7b]
+; NF-NEXT: andq $123, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xe7,0x7b]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 123 ; 0xff << 50
diff --git a/llvm/test/CodeGen/X86/apx/cmov.ll b/llvm/test/CodeGen/X86/apx/cmov.ll
index 7a6a63f813c0a..7b846120d3f72 100644
--- a/llvm/test/CodeGen/X86/apx/cmov.ll
+++ b/llvm/test/CodeGen/X86/apx/cmov.ll
@@ -5,10 +5,10 @@ define i8 @cmov8(i8 %a, i8 %b, i8 %x, ptr %y.ptr) {
; CHECK-LABEL: cmov8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpb %sil, %dil # encoding: [0x40,0x38,0xf7]
-; CHECK-NEXT: cmoval %edi, %edx, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x47,0xd7]
-; CHECK-NEXT: movzbl (%rcx), %ecx # encoding: [0x0f,0xb6,0x09]
-; CHECK-NEXT: cmovbel %edx, %ecx # EVEX TO LEGACY Compression encoding: [0x0f,0x46,0xca]
-; CHECK-NEXT: addb %cl, %al # EVEX TO LEGACY Compression encoding: [0x00,0xc8]
+; CHECK-NEXT: cmovbel %edx, %edi # EVEX TO LEGACY Compression encoding: [0x0f,0x46,0xfa]
+; CHECK-NEXT: movzbl (%rcx), %eax # encoding: [0x0f,0xb6,0x01]
+; CHECK-NEXT: cmovbel %edx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x46,0xc2]
+; CHECK-NEXT: addb %dil, %al # EVEX TO LEGACY Compression encoding: [0x40,0x00,0xf8]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%cond = icmp ugt i8 %a, %b
@@ -23,9 +23,9 @@ define i16 @cmov16(i16 %a, i16 %b, i16 %x, ptr %y.ptr) {
; CHECK-LABEL: cmov16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpw %si, %di # encoding: [0x66,0x39,0xf7]
-; CHECK-NEXT: cmoval %edi, %edx, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x47,0xd7]
-; CHECK-NEXT: cmovaw (%rcx), %dx, %cx # encoding: [0x62,0xf4,0x75,0x18,0x47,0x11]
-; CHECK-NEXT: addw %cx, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x01,0xc8]
+; CHECK-NEXT: cmovbel %edx, %edi # EVEX TO LEGACY Compression encoding: [0x0f,0x46,0xfa]
+; CHECK-NEXT: cmovaw (%rcx), %dx, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x47,0x11]
+; CHECK-NEXT: addw %di, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x01,0xf8]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%cond = icmp ugt i16 %a, %b
@@ -41,8 +41,8 @@ define i32 @cmov32(i32 %a, i32 %b, i32 %x, ptr %y.ptr) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpl %esi, %edi # encoding: [0x39,0xf7]
; CHECK-NEXT: cmoval %edi, %edx, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x47,0xd7]
-; CHECK-NEXT: cmoval (%rcx), %edx, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x47,0x11]
-; CHECK-NEXT: addl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x01,0xc8]
+; CHECK-NEXT: cmoval (%rcx), %edx # EVEX TO LEGACY Compression encoding: [0x0f,0x47,0x11]
+; CHECK-NEXT: addl %edx, %eax # EVEX TO LEGACY Compression encoding: [0x01,0xd0]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%cond = icmp ugt i32 %a, %b
@@ -58,8 +58,8 @@ define i64 @cmov64(i64 %a, i64 %b, i64 %x, ptr %y.ptr) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpq %rsi, %rdi # encoding: [0x48,0x39,0xf7]
; CHECK-NEXT: cmovaq %rdi, %rdx, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x47,0xd7]
-; CHECK-NEXT: cmovaq (%rcx), %rdx, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x47,0x11]
-; CHECK-NEXT: addq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x01,0xc8]
+; CHECK-NEXT: cmovaq (%rcx), %rdx # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x47,0x11]
+; CHECK-NEXT: addq %rdx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x01,0xd0]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%cond = icmp ugt i64 %a, %b
diff --git a/llvm/test/CodeGen/X86/apx/mul-i1024.ll b/llvm/test/CodeGen/X86/apx/mul-i1024.ll
index 2b99c44fc769a..a4d15a1b21d6b 100644
--- a/llvm/test/CodeGen/X86/apx/mul-i1024.ll
+++ b/llvm/test/CodeGen/X86/apx/mul-i1024.ll
@@ -1041,41 +1041,41 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: pushq %r13
; EGPR-NDD-NEXT: pushq %r12
; EGPR-NDD-NEXT: pushq %rbx
-; EGPR-NDD-NEXT: subq $104, %rsp
+; EGPR-NDD-NEXT: subq $96, %rsp
; EGPR-NDD-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: movq %rsi, %r15
; EGPR-NDD-NEXT: movq %rdi, %r20
-; EGPR-NDD-NEXT: movq (%rdi), %r16
-; EGPR-NDD-NEXT: movq 8(%rdi), %r14
+; EGPR-NDD-NEXT: movq (%rdi), %r17
+; EGPR-NDD-NEXT: movq 8(%rdi), %r11
; EGPR-NDD-NEXT: movq 24(%rdi), %r9
; EGPR-NDD-NEXT: movq 16(%rdi), %r10
; EGPR-NDD-NEXT: movq 40(%rdi), %rdi
-; EGPR-NDD-NEXT: movq 32(%r20), %r11
-; EGPR-NDD-NEXT: movq 56(%r20), %r17
-; EGPR-NDD-NEXT: movq 48(%r20), %r15
-; EGPR-NDD-NEXT: movq 24(%rsi), %r18
+; EGPR-NDD-NEXT: movq 32(%r20), %r16
+; EGPR-NDD-NEXT: movq 56(%r20), %r18
+; EGPR-NDD-NEXT: movq 48(%r20), %r23
+; EGPR-NDD-NEXT: movq 24(%rsi), %r14
; EGPR-NDD-NEXT: movq 16(%rsi), %r24
; EGPR-NDD-NEXT: movq (%rsi), %r22
; EGPR-NDD-NEXT: movq 8(%rsi), %r21
-; EGPR-NDD-NEXT: movq %rsi, %r23
-; EGPR-NDD-NEXT: movq %r15, %rax
+; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: mulq %r22
; EGPR-NDD-NEXT: movq %rdx, %r25
; EGPR-NDD-NEXT: movq %rax, %r19
-; EGPR-NDD-NEXT: movq %r17, %rax
+; EGPR-NDD-NEXT: movq %r18, %rax
; EGPR-NDD-NEXT: mulq %r22
-; EGPR-NDD-NEXT: addq %r25, %rax, %rcx
-; EGPR-NDD-NEXT: adcq $0, %rdx, %rsi
-; EGPR-NDD-NEXT: movq %r15, %rax
+; EGPR-NDD-NEXT: addq %rax, %r25
+; EGPR-NDD-NEXT: adcq $0, %rdx, %rcx
+; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %rcx, %rax, %r8
-; EGPR-NDD-NEXT: adcq %rdx, %rsi, %rcx
+; EGPR-NDD-NEXT: addq %r25, %rax, %rsi
+; EGPR-NDD-NEXT: adcq %rdx, %rcx
; EGPR-NDD-NEXT: setb %al
-; EGPR-NDD-NEXT: movzbl %al, %esi
-; EGPR-NDD-NEXT: movq %r17, %rax
+; EGPR-NDD-NEXT: movzbl %al, %r8d
+; EGPR-NDD-NEXT: movq %r18, %rax
; EGPR-NDD-NEXT: mulq %r21
; EGPR-NDD-NEXT: addq %rcx, %rax, %r27
-; EGPR-NDD-NEXT: adcq %rdx, %rsi
-; EGPR-NDD-NEXT: movq %r11, %rax
+; EGPR-NDD-NEXT: adcq %rdx, %r8
+; EGPR-NDD-NEXT: movq %r16, %rax
; EGPR-NDD-NEXT: mulq %r22
; EGPR-NDD-NEXT: movq %rdx, %r26
; EGPR-NDD-NEXT: movq %rax, %r25
@@ -1083,7 +1083,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: mulq %r22
; EGPR-NDD-NEXT: addq %r26, %rax, %rcx
; EGPR-NDD-NEXT: adcq $0, %rdx, %r26
-; EGPR-NDD-NEXT: movq %r11, %rax
+; EGPR-NDD-NEXT: movq %r16, %rax
; EGPR-NDD-NEXT: mulq %r21
; EGPR-NDD-NEXT: addq %rax, %rcx
; EGPR-NDD-NEXT: adcq %rdx, %r26
@@ -1094,58 +1094,59 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %r26, %rax
; EGPR-NDD-NEXT: adcq %r28, %rdx
; EGPR-NDD-NEXT: addq %rax, %r19, %r28
-; EGPR-NDD-NEXT: adcq %rdx, %r8
+; EGPR-NDD-NEXT: adcq %rdx, %rsi, %r29
; EGPR-NDD-NEXT: adcq $0, %r27
-; EGPR-NDD-NEXT: adcq $0, %rsi, %r29
-; EGPR-NDD-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: movq %r11, %rax
+; EGPR-NDD-NEXT: adcq $0, %r8
+; EGPR-NDD-NEXT: movq %r16, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: movq %r16, %rax
; EGPR-NDD-NEXT: mulq %r24
; EGPR-NDD-NEXT: movq %rdx, %r19
; EGPR-NDD-NEXT: movq %rax, %r26
; EGPR-NDD-NEXT: movq %rdi, %rax
; EGPR-NDD-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: mulq %r24
-; EGPR-NDD-NEXT: addq %r19, %rax, %rsi
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r19
-; EGPR-NDD-NEXT: movq %r11, %rax
-; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %rsi, %rax, %r30
-; EGPR-NDD-NEXT: adcq %rdx, %r19, %rsi
+; EGPR-NDD-NEXT: addq %rax, %r19
+; EGPR-NDD-NEXT: adcq $0, %rdx, %rsi
+; EGPR-NDD-NEXT: movq %r16, %rax
+; EGPR-NDD-NEXT: mulq %r14
+; EGPR-NDD-NEXT: addq %rax, %r19
+; EGPR-NDD-NEXT: adcq %rdx, %rsi
; EGPR-NDD-NEXT: setb %al
-; EGPR-NDD-NEXT: movzbl %al, %r19d
+; EGPR-NDD-NEXT: movzbl %al, %r30d
; EGPR-NDD-NEXT: movq %rdi, %rax
-; EGPR-NDD-NEXT: mulq %r18
+; EGPR-NDD-NEXT: mulq %r14
; EGPR-NDD-NEXT: ...
[truncated]
|
And |
@FreddyLeaf You can use markdown syntax for table |
This patch is to address the comment here: https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/5?u=freddyye |
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM w/ one comment
To address @topperc 's comment at https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/5?u=kanrobert