Skip to content

Commit 1dd8cf1

Browse files
committed
[SelectionDAG][X86] Fold sub(x, mul(divrem(x,y)[0], y)) to divrem(x, y)[1]
1 parent 1acbd2b commit 1dd8cf1

File tree

3 files changed

+126
-154
lines changed

3 files changed

+126
-154
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3867,6 +3867,63 @@ static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG) {
38673867
return Matcher.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, Not);
38683868
}
38693869

3870+
// Fold sub(x, mul(divrem(x,y)[0], y)) to divrem(x, y)[1]
3871+
static SDValue foldRemainderIdiom(SDNode *N, SelectionDAG &DAG) {
3872+
assert(N->getOpcode() == ISD::SUB && "Node must be a SUB");
3873+
SDValue Sub0 = N->getOperand(0);
3874+
SDValue Sub1 = N->getOperand(1);
3875+
SDLoc DL(N);
3876+
3877+
auto CheckAndFoldMulCase = [&](SDValue DivRem, SDValue MaybeY) -> SDValue {
3878+
if ((DivRem.getOpcode() == ISD::SDIVREM ||
3879+
DivRem.getOpcode() == ISD::UDIVREM) &&
3880+
DivRem.getResNo() == 0 && DivRem.getOperand(0) == Sub0 &&
3881+
DivRem.getOperand(1) == MaybeY) {
3882+
return SDValue(DivRem.getNode(), 1);
3883+
}
3884+
return SDValue();
3885+
};
3886+
3887+
if (Sub1.getOpcode() == ISD::MUL) {
3888+
// (sub x, (mul divrem(x,y)[0], y))
3889+
SDValue Mul0 = Sub1.getOperand(0);
3890+
SDValue Mul1 = Sub1.getOperand(1);
3891+
3892+
SDValue Res = CheckAndFoldMulCase(Mul0, Mul1);
3893+
if (Res)
3894+
return Res;
3895+
3896+
Res = CheckAndFoldMulCase(Mul1, Mul0);
3897+
if (Res)
3898+
return Res;
3899+
3900+
} else if (Sub1.getOpcode() == ISD::SHL) {
3901+
// Handle (sub x, (shl divrem(x,y)[0], C)) where y = 1 << C
3902+
SDValue Shl0 = Sub1.getOperand(0);
3903+
SDValue Shl1 = Sub1.getOperand(1);
3904+
// Check if Shl0 is divrem(x, Y)[0]
3905+
if ((Shl0.getOpcode() == ISD::SDIVREM ||
3906+
Shl0.getOpcode() == ISD::UDIVREM) &&
3907+
Shl0.getResNo() == 0 && Shl0.getOperand(0) == Sub0) {
3908+
3909+
SDValue Divisor = Shl0.getOperand(1);
3910+
3911+
// Check if DivRemDivisor is a constant power of 2
3912+
ConstantSDNode *DivC = isConstOrConstSplat(Divisor);
3913+
ConstantSDNode *ShC = isConstOrConstSplat(Shl1);
3914+
if (!DivC || !ShC) {
3915+
return SDValue();
3916+
}
3917+
3918+
if (DivC->getAPIntValue().isPowerOf2() &&
3919+
DivC->getAPIntValue() == ShC->getAPIntValue().logBase2()) {
3920+
return SDValue(Shl0.getNode(), 1);
3921+
}
3922+
}
3923+
}
3924+
return SDValue();
3925+
}
3926+
38703927
// Since it may not be valid to emit a fold to zero for vector initializers
38713928
// check if we can before folding.
38723929
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
@@ -4094,6 +4151,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
40944151
if (SDValue V = foldSubToUSubSat(VT, N, DL))
40954152
return V;
40964153

4154+
if (SDValue V = foldRemainderIdiom(N, DAG))
4155+
return V;
4156+
40974157
// (A - B) - 1 -> add (xor B, -1), A
40984158
if (sd_match(N, m_Sub(m_OneUse(m_Sub(m_Value(A), m_Value(B))), m_One())))
40994159
return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));

llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll

Lines changed: 33 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -11,25 +11,20 @@
1111
define i8 @scalar_i8(i8 %x, i8 %y, ptr %divdst) nounwind {
1212
; X86-LABEL: scalar_i8:
1313
; X86: # %bb.0:
14+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
15+
; X86-NEXT: idivb {{[0-9]+}}(%esp)
1416
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
15-
; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
16-
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
17-
; X86-NEXT: movsbl %cl, %eax
18-
; X86-NEXT: idivb %ch
17+
; X86-NEXT: movsbl %ah, %ecx
1918
; X86-NEXT: movb %al, (%edx)
20-
; X86-NEXT: mulb %ch
21-
; X86-NEXT: subb %al, %cl
2219
; X86-NEXT: movl %ecx, %eax
2320
; X86-NEXT: retl
2421
;
2522
; X64-LABEL: scalar_i8:
2623
; X64: # %bb.0:
27-
; X64-NEXT: movsbl %dil, %ecx
28-
; X64-NEXT: movl %ecx, %eax
24+
; X64-NEXT: movsbl %dil, %eax
2925
; X64-NEXT: idivb %sil
26+
; X64-NEXT: movsbl %ah, %ecx
3027
; X64-NEXT: movb %al, (%rdx)
31-
; X64-NEXT: mulb %sil
32-
; X64-NEXT: subb %al, %cl
3328
; X64-NEXT: movl %ecx, %eax
3429
; X64-NEXT: retq
3530
%div = sdiv i8 %x, %y
@@ -42,34 +37,23 @@ define i8 @scalar_i8(i8 %x, i8 %y, ptr %divdst) nounwind {
4237
define i16 @scalar_i16(i16 %x, i16 %y, ptr %divdst) nounwind {
4338
; X86-LABEL: scalar_i16:
4439
; X86: # %bb.0:
45-
; X86-NEXT: pushl %edi
46-
; X86-NEXT: pushl %esi
47-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
48-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
49-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
50-
; X86-NEXT: movl %ecx, %eax
40+
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
5141
; X86-NEXT: cwtd
52-
; X86-NEXT: idivw %si
53-
; X86-NEXT: # kill: def $ax killed $ax def $eax
54-
; X86-NEXT: movw %ax, (%edi)
55-
; X86-NEXT: imull %eax, %esi
56-
; X86-NEXT: subl %esi, %ecx
57-
; X86-NEXT: movl %ecx, %eax
58-
; X86-NEXT: popl %esi
59-
; X86-NEXT: popl %edi
42+
; X86-NEXT: idivw {{[0-9]+}}(%esp)
43+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
44+
; X86-NEXT: movw %ax, (%ecx)
45+
; X86-NEXT: movl %edx, %eax
6046
; X86-NEXT: retl
6147
;
6248
; X64-LABEL: scalar_i16:
6349
; X64: # %bb.0:
6450
; X64-NEXT: movq %rdx, %rcx
6551
; X64-NEXT: movl %edi, %eax
52+
; X64-NEXT: # kill: def $ax killed $ax killed $eax
6653
; X64-NEXT: cwtd
6754
; X64-NEXT: idivw %si
68-
; X64-NEXT: # kill: def $ax killed $ax def $eax
6955
; X64-NEXT: movw %ax, (%rcx)
70-
; X64-NEXT: imull %eax, %esi
71-
; X64-NEXT: subl %esi, %edi
72-
; X64-NEXT: movl %edi, %eax
56+
; X64-NEXT: movl %edx, %eax
7357
; X64-NEXT: retq
7458
%div = sdiv i16 %x, %y
7559
store i16 %div, ptr %divdst, align 4
@@ -81,20 +65,12 @@ define i16 @scalar_i16(i16 %x, i16 %y, ptr %divdst) nounwind {
8165
define i32 @scalar_i32(i32 %x, i32 %y, ptr %divdst) nounwind {
8266
; X86-LABEL: scalar_i32:
8367
; X86: # %bb.0:
84-
; X86-NEXT: pushl %edi
85-
; X86-NEXT: pushl %esi
86-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
87-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
88-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
89-
; X86-NEXT: movl %ecx, %eax
68+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
9069
; X86-NEXT: cltd
91-
; X86-NEXT: idivl %edi
92-
; X86-NEXT: movl %eax, (%esi)
93-
; X86-NEXT: imull %edi, %eax
94-
; X86-NEXT: subl %eax, %ecx
95-
; X86-NEXT: movl %ecx, %eax
96-
; X86-NEXT: popl %esi
97-
; X86-NEXT: popl %edi
70+
; X86-NEXT: idivl {{[0-9]+}}(%esp)
71+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
72+
; X86-NEXT: movl %eax, (%ecx)
73+
; X86-NEXT: movl %edx, %eax
9874
; X86-NEXT: retl
9975
;
10076
; X64-LABEL: scalar_i32:
@@ -104,9 +80,7 @@ define i32 @scalar_i32(i32 %x, i32 %y, ptr %divdst) nounwind {
10480
; X64-NEXT: cltd
10581
; X64-NEXT: idivl %esi
10682
; X64-NEXT: movl %eax, (%rcx)
107-
; X64-NEXT: imull %esi, %eax
108-
; X64-NEXT: subl %eax, %edi
109-
; X64-NEXT: movl %edi, %eax
83+
; X64-NEXT: movl %edx, %eax
11084
; X64-NEXT: retq
11185
%div = sdiv i32 %x, %y
11286
store i32 %div, ptr %divdst, align 4
@@ -158,9 +132,7 @@ define i64 @scalar_i64(i64 %x, i64 %y, ptr %divdst) nounwind {
158132
; X64-NEXT: cqto
159133
; X64-NEXT: idivq %rsi
160134
; X64-NEXT: movq %rax, (%rcx)
161-
; X64-NEXT: imulq %rsi, %rax
162-
; X64-NEXT: subq %rax, %rdi
163-
; X64-NEXT: movq %rdi, %rax
135+
; X64-NEXT: movq %rdx, %rax
164136
; X64-NEXT: retq
165137
%div = sdiv i64 %x, %y
166138
store i64 %div, ptr %divdst, align 4
@@ -1199,34 +1171,23 @@ define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y, ptr %divdst) nounw
11991171
define i32 @scalar_i32_commutative(i32 %x, ptr %ysrc, ptr %divdst) nounwind {
12001172
; X86-LABEL: scalar_i32_commutative:
12011173
; X86: # %bb.0:
1202-
; X86-NEXT: pushl %edi
1203-
; X86-NEXT: pushl %esi
1204-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1205-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
12061174
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1207-
; X86-NEXT: movl (%eax), %edi
1208-
; X86-NEXT: movl %ecx, %eax
1175+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
12091176
; X86-NEXT: cltd
1210-
; X86-NEXT: idivl %edi
1211-
; X86-NEXT: movl %eax, (%esi)
1212-
; X86-NEXT: imull %eax, %edi
1213-
; X86-NEXT: subl %edi, %ecx
1214-
; X86-NEXT: movl %ecx, %eax
1215-
; X86-NEXT: popl %esi
1216-
; X86-NEXT: popl %edi
1177+
; X86-NEXT: idivl (%ecx)
1178+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1179+
; X86-NEXT: movl %eax, (%ecx)
1180+
; X86-NEXT: movl %edx, %eax
12171181
; X86-NEXT: retl
12181182
;
12191183
; X64-LABEL: scalar_i32_commutative:
12201184
; X64: # %bb.0:
12211185
; X64-NEXT: movq %rdx, %rcx
1222-
; X64-NEXT: movl (%rsi), %esi
12231186
; X64-NEXT: movl %edi, %eax
12241187
; X64-NEXT: cltd
1225-
; X64-NEXT: idivl %esi
1188+
; X64-NEXT: idivl (%rsi)
12261189
; X64-NEXT: movl %eax, (%rcx)
1227-
; X64-NEXT: imull %eax, %esi
1228-
; X64-NEXT: subl %esi, %edi
1229-
; X64-NEXT: movl %edi, %eax
1190+
; X64-NEXT: movl %edx, %eax
12301191
; X64-NEXT: retq
12311192
%y = load i32, ptr %ysrc, align 4
12321193
%div = sdiv i32 %x, %y
@@ -1240,24 +1201,20 @@ define i32 @scalar_i32_commutative(i32 %x, ptr %ysrc, ptr %divdst) nounwind {
12401201
define i32 @extrause(i32 %x, i32 %y, ptr %divdst, ptr %t1dst) nounwind {
12411202
; X86-LABEL: extrause:
12421203
; X86: # %bb.0:
1243-
; X86-NEXT: pushl %ebx
12441204
; X86-NEXT: pushl %edi
12451205
; X86-NEXT: pushl %esi
1246-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1247-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
1206+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
12481207
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1249-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
1250-
; X86-NEXT: movl %ecx, %eax
12511208
; X86-NEXT: cltd
1252-
; X86-NEXT: idivl %ebx
1209+
; X86-NEXT: idivl %ecx
1210+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1211+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
12531212
; X86-NEXT: movl %eax, (%edi)
1254-
; X86-NEXT: imull %ebx, %eax
1213+
; X86-NEXT: imull %ecx, %eax
12551214
; X86-NEXT: movl %eax, (%esi)
1256-
; X86-NEXT: subl %eax, %ecx
1257-
; X86-NEXT: movl %ecx, %eax
1215+
; X86-NEXT: movl %edx, %eax
12581216
; X86-NEXT: popl %esi
12591217
; X86-NEXT: popl %edi
1260-
; X86-NEXT: popl %ebx
12611218
; X86-NEXT: retl
12621219
;
12631220
; X64-LABEL: extrause:
@@ -1269,8 +1226,7 @@ define i32 @extrause(i32 %x, i32 %y, ptr %divdst, ptr %t1dst) nounwind {
12691226
; X64-NEXT: movl %eax, (%r8)
12701227
; X64-NEXT: imull %esi, %eax
12711228
; X64-NEXT: movl %eax, (%rcx)
1272-
; X64-NEXT: subl %eax, %edi
1273-
; X64-NEXT: movl %edi, %eax
1229+
; X64-NEXT: movl %edx, %eax
12741230
; X64-NEXT: retq
12751231
%div = sdiv i32 %x, %y
12761232
store i32 %div, ptr %divdst, align 4

0 commit comments

Comments
 (0)