Skip to content

Commit 63f1e70

Browse files
committed
[SelectionDAG][X86] Fold sub(x, mul(divrem(x,y)[0], y)) to divrem(x, y)[1]
1 parent c2ef371 commit 63f1e70

File tree

3 files changed

+131
-172
lines changed

3 files changed

+131
-172
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3867,6 +3867,61 @@ static SDValue foldSubCtlzNot(SDNode *N, SelectionDAG &DAG) {
38673867
return Matcher.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, Not);
38683868
}
38693869

3870+
// Fold sub(x, mul(divrem(x,y)[0], y)) to divrem(x, y)[1]
3871+
static SDValue foldRemainderIdiom(SDNode *N, SelectionDAG &DAG, SDLoc &DL) {
3872+
assert(N->getOpcode() == ISD::SUB && "Node must be a SUB");
3873+
SDValue Sub0 = N->getOperand(0);
3874+
SDValue Sub1 = N->getOperand(1);
3875+
3876+
auto CheckAndFoldMulCase = [&](SDValue DivRem, SDValue MaybeY) -> SDValue {
3877+
if ((DivRem.getOpcode() == ISD::SDIVREM ||
3878+
DivRem.getOpcode() == ISD::UDIVREM) &&
3879+
DivRem.getResNo() == 0 && DivRem.getOperand(0) == Sub0 &&
3880+
DivRem.getOperand(1) == MaybeY) {
3881+
return SDValue(DivRem.getNode(), 1);
3882+
}
3883+
return SDValue();
3884+
};
3885+
3886+
if (Sub1.getOpcode() == ISD::MUL) {
3887+
// (sub x, (mul divrem(x,y)[0], y))
3888+
SDValue Mul0 = Sub1.getOperand(0);
3889+
SDValue Mul1 = Sub1.getOperand(1);
3890+
3891+
SDValue Res = CheckAndFoldMulCase(Mul0, Mul1);
3892+
if (Res)
3893+
return Res;
3894+
3895+
Res = CheckAndFoldMulCase(Mul1, Mul0);
3896+
if (Res)
3897+
return Res;
3898+
3899+
} else if (Sub1.getOpcode() == ISD::SHL) {
3900+
// Handle (sub x, (shl divrem(x,y)[0], C)) where y = 1 << C
3901+
SDValue Shl0 = Sub1.getOperand(0);
3902+
SDValue Shl1 = Sub1.getOperand(1);
3903+
// Check if Shl0 is divrem(x, Y)[0]
3904+
if ((Shl0.getOpcode() == ISD::SDIVREM ||
3905+
Shl0.getOpcode() == ISD::UDIVREM) &&
3906+
Shl0.getResNo() == 0 && Shl0.getOperand(0) == Sub0) {
3907+
3908+
SDValue Divisor = Shl0.getOperand(1);
3909+
3910+
ConstantSDNode *DivC = isConstOrConstSplat(Divisor);
3911+
ConstantSDNode *ShC = isConstOrConstSplat(Shl1);
3912+
if (!DivC || !ShC) {
3913+
return SDValue();
3914+
}
3915+
3916+
if (DivC->getAPIntValue().isPowerOf2() &&
3917+
DivC->getAPIntValue().logBase2() == ShC->getAPIntValue()) {
3918+
return SDValue(Shl0.getNode(), 1);
3919+
}
3920+
}
3921+
}
3922+
return SDValue();
3923+
}
3924+
38703925
// Since it may not be valid to emit a fold to zero for vector initializers
38713926
// check if we can before folding.
38723927
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
@@ -4094,6 +4149,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
40944149
if (SDValue V = foldSubToUSubSat(VT, N, DL))
40954150
return V;
40964151

4152+
if (SDValue V = foldRemainderIdiom(N, DAG, DL))
4153+
return V;
4154+
40974155
// (A - B) - 1 -> add (xor B, -1), A
40984156
if (sd_match(N, m_Sub(m_OneUse(m_Sub(m_Value(A), m_Value(B))), m_One())))
40994157
return DAG.getNode(ISD::ADD, DL, VT, A, DAG.getNOT(DL, B, VT));

llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll

Lines changed: 40 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -11,25 +11,20 @@
1111
define i8 @scalar_i8(i8 %x, i8 %y, ptr %divdst) nounwind {
1212
; X86-LABEL: scalar_i8:
1313
; X86: # %bb.0:
14+
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
15+
; X86-NEXT: idivb {{[0-9]+}}(%esp)
1416
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
15-
; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
16-
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
17-
; X86-NEXT: movsbl %cl, %eax
18-
; X86-NEXT: idivb %ch
17+
; X86-NEXT: movsbl %ah, %ecx
1918
; X86-NEXT: movb %al, (%edx)
20-
; X86-NEXT: mulb %ch
21-
; X86-NEXT: subb %al, %cl
2219
; X86-NEXT: movl %ecx, %eax
2320
; X86-NEXT: retl
2421
;
2522
; X64-LABEL: scalar_i8:
2623
; X64: # %bb.0:
27-
; X64-NEXT: movsbl %dil, %ecx
28-
; X64-NEXT: movl %ecx, %eax
24+
; X64-NEXT: movsbl %dil, %eax
2925
; X64-NEXT: idivb %sil
26+
; X64-NEXT: movsbl %ah, %ecx
3027
; X64-NEXT: movb %al, (%rdx)
31-
; X64-NEXT: mulb %sil
32-
; X64-NEXT: subb %al, %cl
3328
; X64-NEXT: movl %ecx, %eax
3429
; X64-NEXT: retq
3530
%div = sdiv i8 %x, %y
@@ -42,34 +37,23 @@ define i8 @scalar_i8(i8 %x, i8 %y, ptr %divdst) nounwind {
4237
define i16 @scalar_i16(i16 %x, i16 %y, ptr %divdst) nounwind {
4338
; X86-LABEL: scalar_i16:
4439
; X86: # %bb.0:
45-
; X86-NEXT: pushl %edi
46-
; X86-NEXT: pushl %esi
47-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
48-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
49-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
50-
; X86-NEXT: movl %ecx, %eax
40+
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
5141
; X86-NEXT: cwtd
52-
; X86-NEXT: idivw %si
53-
; X86-NEXT: # kill: def $ax killed $ax def $eax
54-
; X86-NEXT: movw %ax, (%edi)
55-
; X86-NEXT: imull %eax, %esi
56-
; X86-NEXT: subl %esi, %ecx
57-
; X86-NEXT: movl %ecx, %eax
58-
; X86-NEXT: popl %esi
59-
; X86-NEXT: popl %edi
42+
; X86-NEXT: idivw {{[0-9]+}}(%esp)
43+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
44+
; X86-NEXT: movw %ax, (%ecx)
45+
; X86-NEXT: movl %edx, %eax
6046
; X86-NEXT: retl
6147
;
6248
; X64-LABEL: scalar_i16:
6349
; X64: # %bb.0:
6450
; X64-NEXT: movq %rdx, %rcx
6551
; X64-NEXT: movl %edi, %eax
52+
; X64-NEXT: # kill: def $ax killed $ax killed $eax
6653
; X64-NEXT: cwtd
6754
; X64-NEXT: idivw %si
68-
; X64-NEXT: # kill: def $ax killed $ax def $eax
6955
; X64-NEXT: movw %ax, (%rcx)
70-
; X64-NEXT: imull %eax, %esi
71-
; X64-NEXT: subl %esi, %edi
72-
; X64-NEXT: movl %edi, %eax
56+
; X64-NEXT: movl %edx, %eax
7357
; X64-NEXT: retq
7458
%div = sdiv i16 %x, %y
7559
store i16 %div, ptr %divdst, align 4
@@ -81,20 +65,12 @@ define i16 @scalar_i16(i16 %x, i16 %y, ptr %divdst) nounwind {
8165
define i32 @scalar_i32(i32 %x, i32 %y, ptr %divdst) nounwind {
8266
; X86-LABEL: scalar_i32:
8367
; X86: # %bb.0:
84-
; X86-NEXT: pushl %edi
85-
; X86-NEXT: pushl %esi
86-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
87-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
88-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
89-
; X86-NEXT: movl %ecx, %eax
68+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
9069
; X86-NEXT: cltd
91-
; X86-NEXT: idivl %edi
92-
; X86-NEXT: movl %eax, (%esi)
93-
; X86-NEXT: imull %edi, %eax
94-
; X86-NEXT: subl %eax, %ecx
95-
; X86-NEXT: movl %ecx, %eax
96-
; X86-NEXT: popl %esi
97-
; X86-NEXT: popl %edi
70+
; X86-NEXT: idivl {{[0-9]+}}(%esp)
71+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
72+
; X86-NEXT: movl %eax, (%ecx)
73+
; X86-NEXT: movl %edx, %eax
9874
; X86-NEXT: retl
9975
;
10076
; X64-LABEL: scalar_i32:
@@ -104,9 +80,7 @@ define i32 @scalar_i32(i32 %x, i32 %y, ptr %divdst) nounwind {
10480
; X64-NEXT: cltd
10581
; X64-NEXT: idivl %esi
10682
; X64-NEXT: movl %eax, (%rcx)
107-
; X64-NEXT: imull %esi, %eax
108-
; X64-NEXT: subl %eax, %edi
109-
; X64-NEXT: movl %edi, %eax
83+
; X64-NEXT: movl %edx, %eax
11084
; X64-NEXT: retq
11185
%div = sdiv i32 %x, %y
11286
store i32 %div, ptr %divdst, align 4
@@ -158,9 +132,7 @@ define i64 @scalar_i64(i64 %x, i64 %y, ptr %divdst) nounwind {
158132
; X64-NEXT: cqto
159133
; X64-NEXT: idivq %rsi
160134
; X64-NEXT: movq %rax, (%rcx)
161-
; X64-NEXT: imulq %rsi, %rax
162-
; X64-NEXT: subq %rax, %rdi
163-
; X64-NEXT: movq %rdi, %rax
135+
; X64-NEXT: movq %rdx, %rax
164136
; X64-NEXT: retq
165137
%div = sdiv i64 %x, %y
166138
store i64 %div, ptr %divdst, align 4
@@ -1197,32 +1169,21 @@ define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y, ptr %divdst) nounw
11971169
define i32 @scalar_i32_const_pow2_divisor(i32 %0, ptr %1) minsize nounwind {
11981170
; X86-LABEL: scalar_i32_const_pow2_divisor:
11991171
; X86: # %bb.0:
1200-
; X86-NEXT: pushl %edi
1201-
; X86-NEXT: pushl %esi
1202-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1203-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1204-
; X86-NEXT: movl $256, %edi # imm = 0x100
1205-
; X86-NEXT: movl %ecx, %eax
1172+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1173+
; X86-NEXT: movl $256, %ecx # imm = 0x100
12061174
; X86-NEXT: cltd
1207-
; X86-NEXT: idivl %edi
1208-
; X86-NEXT: movl %eax, %edx
1209-
; X86-NEXT: shll $8, %edx
1210-
; X86-NEXT: subl %edx, %ecx
1211-
; X86-NEXT: movl %ecx, (%esi)
1212-
; X86-NEXT: popl %esi
1213-
; X86-NEXT: popl %edi
1175+
; X86-NEXT: idivl %ecx
1176+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1177+
; X86-NEXT: movl %edx, (%ecx)
12141178
; X86-NEXT: retl
12151179
;
12161180
; X64-LABEL: scalar_i32_const_pow2_divisor:
12171181
; X64: # %bb.0:
1218-
; X64-NEXT: movl $256, %ecx # imm = 0x100
12191182
; X64-NEXT: movl %edi, %eax
1183+
; X64-NEXT: movl $256, %ecx # imm = 0x100
12201184
; X64-NEXT: cltd
12211185
; X64-NEXT: idivl %ecx
1222-
; X64-NEXT: movl %eax, %ecx
1223-
; X64-NEXT: shll $8, %ecx
1224-
; X64-NEXT: subl %ecx, %edi
1225-
; X64-NEXT: movl %edi, (%rsi)
1186+
; X64-NEXT: movl %edx, (%rsi)
12261187
; X64-NEXT: retq
12271188
%3 = srem i32 %0, 256
12281189
store i32 %3, ptr %1, align 4
@@ -1235,34 +1196,23 @@ define i32 @scalar_i32_const_pow2_divisor(i32 %0, ptr %1) minsize nounwind {
12351196
define i32 @scalar_i32_commutative(i32 %x, ptr %ysrc, ptr %divdst) nounwind {
12361197
; X86-LABEL: scalar_i32_commutative:
12371198
; X86: # %bb.0:
1238-
; X86-NEXT: pushl %edi
1239-
; X86-NEXT: pushl %esi
1240-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1241-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
12421199
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1243-
; X86-NEXT: movl (%eax), %edi
1244-
; X86-NEXT: movl %ecx, %eax
1200+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
12451201
; X86-NEXT: cltd
1246-
; X86-NEXT: idivl %edi
1247-
; X86-NEXT: movl %eax, (%esi)
1248-
; X86-NEXT: imull %eax, %edi
1249-
; X86-NEXT: subl %edi, %ecx
1250-
; X86-NEXT: movl %ecx, %eax
1251-
; X86-NEXT: popl %esi
1252-
; X86-NEXT: popl %edi
1202+
; X86-NEXT: idivl (%ecx)
1203+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1204+
; X86-NEXT: movl %eax, (%ecx)
1205+
; X86-NEXT: movl %edx, %eax
12531206
; X86-NEXT: retl
12541207
;
12551208
; X64-LABEL: scalar_i32_commutative:
12561209
; X64: # %bb.0:
12571210
; X64-NEXT: movq %rdx, %rcx
1258-
; X64-NEXT: movl (%rsi), %esi
12591211
; X64-NEXT: movl %edi, %eax
12601212
; X64-NEXT: cltd
1261-
; X64-NEXT: idivl %esi
1213+
; X64-NEXT: idivl (%rsi)
12621214
; X64-NEXT: movl %eax, (%rcx)
1263-
; X64-NEXT: imull %eax, %esi
1264-
; X64-NEXT: subl %esi, %edi
1265-
; X64-NEXT: movl %edi, %eax
1215+
; X64-NEXT: movl %edx, %eax
12661216
; X64-NEXT: retq
12671217
%y = load i32, ptr %ysrc, align 4
12681218
%div = sdiv i32 %x, %y
@@ -1276,24 +1226,20 @@ define i32 @scalar_i32_commutative(i32 %x, ptr %ysrc, ptr %divdst) nounwind {
12761226
define i32 @extrause(i32 %x, i32 %y, ptr %divdst, ptr %t1dst) nounwind {
12771227
; X86-LABEL: extrause:
12781228
; X86: # %bb.0:
1279-
; X86-NEXT: pushl %ebx
12801229
; X86-NEXT: pushl %edi
12811230
; X86-NEXT: pushl %esi
1282-
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1283-
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
1231+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
12841232
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1285-
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
1286-
; X86-NEXT: movl %ecx, %eax
12871233
; X86-NEXT: cltd
1288-
; X86-NEXT: idivl %ebx
1234+
; X86-NEXT: idivl %ecx
1235+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
1236+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
12891237
; X86-NEXT: movl %eax, (%edi)
1290-
; X86-NEXT: imull %ebx, %eax
1238+
; X86-NEXT: imull %ecx, %eax
12911239
; X86-NEXT: movl %eax, (%esi)
1292-
; X86-NEXT: subl %eax, %ecx
1293-
; X86-NEXT: movl %ecx, %eax
1240+
; X86-NEXT: movl %edx, %eax
12941241
; X86-NEXT: popl %esi
12951242
; X86-NEXT: popl %edi
1296-
; X86-NEXT: popl %ebx
12971243
; X86-NEXT: retl
12981244
;
12991245
; X64-LABEL: extrause:
@@ -1305,8 +1251,7 @@ define i32 @extrause(i32 %x, i32 %y, ptr %divdst, ptr %t1dst) nounwind {
13051251
; X64-NEXT: movl %eax, (%r8)
13061252
; X64-NEXT: imull %esi, %eax
13071253
; X64-NEXT: movl %eax, (%rcx)
1308-
; X64-NEXT: subl %eax, %edi
1309-
; X64-NEXT: movl %edi, %eax
1254+
; X64-NEXT: movl %edx, %eax
13101255
; X64-NEXT: retq
13111256
%div = sdiv i32 %x, %y
13121257
store i32 %div, ptr %divdst, align 4

0 commit comments

Comments
 (0)