Skip to content

Commit d1c6186

Browse files
committed
[X86] Don't emit MULX by default with BMI2
MULX has somewhat improved register allocation constraints compared to the legacy MUL instruction. Both output registers are encoded instead of fixed to EAX/EDX, but EDX is used as input. It also doesn't touch flags. Unfortunately, the encoding is longer. Prefering it whenever BMI2 is enabled is probably not optimal. Choosing it should somehow be a function of register allocation constraints like converting adds to three address. gcc and icc definitely don't pick MULX by default. Not sure what if any rules they have for using it. Differential Revision: https://reviews.llvm.org/D55565 llvm-svn: 348975
1 parent 50c9bf4 commit d1c6186

File tree

7 files changed

+217
-407
lines changed

7 files changed

+217
-407
lines changed

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 17 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -3410,14 +3410,11 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
34103410

34113411
unsigned Opc, MOpc;
34123412
bool isSigned = Opcode == ISD::SMUL_LOHI;
3413-
bool hasBMI2 = Subtarget->hasBMI2();
34143413
if (!isSigned) {
34153414
switch (NVT.SimpleTy) {
34163415
default: llvm_unreachable("Unsupported VT!");
3417-
case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
3418-
MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
3419-
case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
3420-
MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
3416+
case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
3417+
case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
34213418
}
34223419
} else {
34233420
switch (NVT.SimpleTy) {
@@ -3438,12 +3435,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
34383435
case X86::MUL64r:
34393436
SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
34403437
break;
3441-
case X86::MULX32rr:
3442-
SrcReg = X86::EDX; LoReg = HiReg = 0;
3443-
break;
3444-
case X86::MULX64rr:
3445-
SrcReg = X86::RDX; LoReg = HiReg = 0;
3446-
break;
34473438
}
34483439

34493440
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
@@ -3457,66 +3448,43 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
34573448

34583449
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
34593450
N0, SDValue()).getValue(1);
3460-
SDValue ResHi, ResLo;
3461-
34623451
if (foldedLoad) {
34633452
SDValue Chain;
34643453
MachineSDNode *CNode = nullptr;
34653454
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
34663455
InFlag };
3467-
if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
3468-
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
3469-
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3470-
ResHi = SDValue(CNode, 0);
3471-
ResLo = SDValue(CNode, 1);
3472-
Chain = SDValue(CNode, 2);
3473-
InFlag = SDValue(CNode, 3);
3474-
} else {
3475-
SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
3476-
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3477-
Chain = SDValue(CNode, 0);
3478-
InFlag = SDValue(CNode, 1);
3479-
}
3456+
SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
3457+
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
3458+
Chain = SDValue(CNode, 0);
3459+
InFlag = SDValue(CNode, 1);
34803460

34813461
// Update the chain.
34823462
ReplaceUses(N1.getValue(1), Chain);
34833463
// Record the mem-refs
34843464
CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
34853465
} else {
34863466
SDValue Ops[] = { N1, InFlag };
3487-
if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
3488-
SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
3489-
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
3490-
ResHi = SDValue(CNode, 0);
3491-
ResLo = SDValue(CNode, 1);
3492-
InFlag = SDValue(CNode, 2);
3493-
} else {
3494-
SDVTList VTs = CurDAG->getVTList(MVT::Glue);
3495-
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
3496-
InFlag = SDValue(CNode, 0);
3497-
}
3467+
SDVTList VTs = CurDAG->getVTList(MVT::Glue);
3468+
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
3469+
InFlag = SDValue(CNode, 0);
34983470
}
34993471

35003472
// Copy the low half of the result, if it is needed.
35013473
if (!SDValue(Node, 0).use_empty()) {
3502-
if (!ResLo.getNode()) {
3503-
assert(LoReg && "Register for low half is not defined!");
3504-
ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
3505-
InFlag);
3506-
InFlag = ResLo.getValue(2);
3507-
}
3474+
assert(LoReg && "Register for low half is not defined!");
3475+
SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
3476+
NVT, InFlag);
3477+
InFlag = ResLo.getValue(2);
35083478
ReplaceUses(SDValue(Node, 0), ResLo);
35093479
LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
35103480
dbgs() << '\n');
35113481
}
35123482
// Copy the high half of the result, if it is needed.
35133483
if (!SDValue(Node, 1).use_empty()) {
3514-
if (!ResHi.getNode()) {
3515-
assert(HiReg && "Register for high half is not defined!");
3516-
ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
3517-
InFlag);
3518-
InFlag = ResHi.getValue(2);
3519-
}
3484+
assert(HiReg && "Register for high half is not defined!");
3485+
SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
3486+
NVT, InFlag);
3487+
InFlag = ResHi.getValue(2);
35203488
ReplaceUses(SDValue(Node, 1), ResHi);
35213489
LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
35223490
dbgs() << '\n');

llvm/test/CodeGen/X86/bmi2-x86_64.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ define i64 @mulx64(i64 %x, i64 %y, i64* %p) {
6868
; CHECK-LABEL: mulx64:
6969
; CHECK: # %bb.0:
7070
; CHECK-NEXT: movq %rdx, %rcx
71-
; CHECK-NEXT: movq %rdi, %rdx
72-
; CHECK-NEXT: mulxq %rsi, %rax, %rdx
71+
; CHECK-NEXT: movq %rdi, %rax
72+
; CHECK-NEXT: mulq %rsi
7373
; CHECK-NEXT: movq %rdx, (%rcx)
7474
; CHECK-NEXT: retq
7575
%x1 = zext i64 %x to i128
@@ -86,8 +86,8 @@ define i64 @mulx64_load(i64 %x, i64* %y, i64* %p) {
8686
; CHECK-LABEL: mulx64_load:
8787
; CHECK: # %bb.0:
8888
; CHECK-NEXT: movq %rdx, %rcx
89-
; CHECK-NEXT: movq %rdi, %rdx
90-
; CHECK-NEXT: mulxq (%rsi), %rax, %rdx
89+
; CHECK-NEXT: movq %rdi, %rax
90+
; CHECK-NEXT: mulq (%rsi)
9191
; CHECK-NEXT: movq %rdx, (%rcx)
9292
; CHECK-NEXT: retq
9393
%y1 = load i64, i64* %y

llvm/test/CodeGen/X86/bmi2.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,11 @@ define i32 @mulx32(i32 %x, i32 %y, i32* %p) {
120120
; X86-LABEL: mulx32:
121121
; X86: # %bb.0:
122122
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
123-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
124123
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
125-
; X86-NEXT: addl %edx, %edx
124+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
126125
; X86-NEXT: addl %eax, %eax
127-
; X86-NEXT: mulxl %eax, %eax, %edx
126+
; X86-NEXT: addl %edx, %edx
127+
; X86-NEXT: mull %edx
128128
; X86-NEXT: movl %edx, (%ecx)
129129
; X86-NEXT: retl
130130
;
@@ -156,10 +156,10 @@ define i32 @mulx32_load(i32 %x, i32* %y, i32* %p) {
156156
; X86-LABEL: mulx32_load:
157157
; X86: # %bb.0:
158158
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
159-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
160159
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
161-
; X86-NEXT: addl %edx, %edx
162-
; X86-NEXT: mulxl (%eax), %eax, %edx
160+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
161+
; X86-NEXT: addl %eax, %eax
162+
; X86-NEXT: mull (%edx)
163163
; X86-NEXT: movl %edx, (%ecx)
164164
; X86-NEXT: retl
165165
;

0 commit comments

Comments
 (0)